/* This file is a part of MIR project.
   Copyright (C) 2020-2024 Vladimir Makarov <vmakarov.gcc@gmail.com>.
*/

/* In MIR generated code unsigned 32-bit values are zero extended but
   passing/returning such value is signed extended (which is their
   riscv ABI representation).

   In theory we should pass vararg unsigned as sign extended according
   to ABI but gcc/clang va_arg code do the correct sign extension
   anyway.  So we can ignore sign extension for integer vararg
   arguments.  */

/* ??? TODO: o compressed c.j, c.beqz, c.bnez (is it worth because such jumps are
               usually forward and it requires complicated algorithm for relaxing)
             o rd = rs - const -> rd = rs + (-const) for using addi
             o implement generation for FLEN=128
             o save/set/restore sp/fp only we use it
             o optimization: remove ext32 for branches generated in machinize if operands
                             are generated by short insns
*/

static void fancy_abort (int code) {
  if (!code) abort ();
}
#undef gen_assert
#define gen_assert(c) fancy_abort (c)

#define TARGET_EXPAND_ADDO
#define TARGET_EXPAND_ADDOS
#define TARGET_EXPAND_UADDO
#define TARGET_EXPAND_UADDOS
#define TARGET_EXPAND_MULO
#define TARGET_EXPAND_MULOS
#define TARGET_EXPAND_UMULO
#define TARGET_EXPAND_UMULOS

#include <limits.h>

#include "mir-riscv64.h"

#define REP_SEP ,
static const MIR_reg_t hard_reg_alloc_order[] = {
  REP8 (HREG_EL, R8, R9, R10, R11, R12, R13, R14, R15),
  REP8 (HREG_EL, F8, F9, F10, F11, F12, F13, F14, F15),

  REP8 (HREG_EL, R0, R1, R2, R3, R4, R5, R6, R7),
  REP8 (HREG_EL, R16, R17, R18, R19, R20, R21, R22, R23),
  REP8 (HREG_EL, R24, R25, R26, R27, R28, R29, R30, R31),

  REP8 (HREG_EL, F0, F1, F2, F3, F4, F5, F6, F7),
  REP8 (HREG_EL, F16, F17, F18, F19, F20, F21, F22, F23),
  REP8 (HREG_EL, F24, F25, F26, F27, F28, F29, F30, F31),
};
#undef REP_SEP

static const MIR_reg_t LINK_HARD_REG = RA_HARD_REG;

#define TARGET_HARD_REG_ALLOC_ORDER(n) hard_reg_alloc_order[n]

static void check_hard_reg_alloc_order (void) {
  int i;
  char check_p[F31_HARD_REG + 1];

  gen_assert (MAX_HARD_REG == F31_HARD_REG
              && sizeof (hard_reg_alloc_order) / sizeof (MIR_reg_t) == MAX_HARD_REG + 1);
  for (i = 0; i <= MAX_HARD_REG; i++) check_p[i] = FALSE;
  for (i = 0; i <= MAX_HARD_REG; i++) {
    gen_assert (!check_p[hard_reg_alloc_order[i]]);
    check_p[hard_reg_alloc_order[i]] = TRUE;
  }
  for (i = 0; i <= MAX_HARD_REG; i++) gen_assert (check_p[i]);
}

static inline MIR_reg_t target_nth_loc (MIR_reg_t loc, MIR_type_t type MIR_UNUSED, int n) {
  return loc + n;
}

static inline int target_call_used_hard_reg_p (MIR_reg_t hard_reg, MIR_type_t type) {
  assert (hard_reg <= MAX_HARD_REG);
  if (hard_reg <= R31_HARD_REG)
    return !(hard_reg == R8_HARD_REG || hard_reg == R9_HARD_REG
             || (hard_reg >= R18_HARD_REG && hard_reg <= R27_HARD_REG));
  return type == MIR_T_LD
         || !(hard_reg == F8_HARD_REG || hard_reg == F9_HARD_REG
              || (hard_reg >= F18_HARD_REG && hard_reg <= F27_HARD_REG));
}

/* Stack layout (sp refers to the last reserved stack slot address)
   from higher address to lower address memory:

   | ...           |  prev func stack (start aligned to 16 bytes)
   |---------------|
   | gr save area  |  optional area for vararg func reg save area
   |               |  (int arg regs corresponding to varargs)
   |---------------|
   | saved regs    |  callee saved regs used in the func (known only after RA), rounded 16 bytes
   |---------------|
   | slots assigned|  can be absent for small functions (known only after RA), rounded 16 bytes
   |   to pseudos  |
   |---------------|
   |   previous    |  (sp right after call) 16-bytes setup in prolog, used only for varag func or
   | stack start   |   args passed on stack to move args and to setup va_start on machinize pass
   |---------------|
   | RA            |  sp before prologue and after saving RA = start sp
   |---------------|
   | old FP        |  frame pointer for previous func stack frame; new FP refers for here
   |---------------|
   |  small aggreg |
   |  save area    |  optional
   |---------------|
   | alloca areas  |  optional
   |---------------|
   | slots for     |  dynamically allocated/deallocated by caller
   |  passing args |

   size of slots and saved regs is multiple of 16 bytes

 */

static const MIR_insn_code_t target_io_dup_op_insn_codes[] = {MIR_INSN_BOUND};

/* Return extension insn for passing args and returns.  */
static MIR_insn_code_t get_ext_code (MIR_type_t type, int arg_pass_p) {
  switch (type) {
  case MIR_T_I8: return MIR_EXT8;
  case MIR_T_U8: return MIR_UEXT8;
  case MIR_T_I16: return MIR_EXT16;
  case MIR_T_U16: return MIR_UEXT16;
  case MIR_T_I32: return MIR_EXT32;
  case MIR_T_U32:
    /* even unsigned 32-bit is extended by sign according to ABI -- pass it the right way: */
    return (arg_pass_p ? MIR_EXT32 : MIR_UEXT32);
  default: return MIR_INVALID_INSN;
  }
}

static MIR_reg_t get_arg_reg (MIR_type_t arg_type, int vararg_p, size_t *int_arg_num,
                              size_t *fp_arg_num, MIR_insn_code_t *mov_code) {
  MIR_reg_t arg_reg;

  if (!vararg_p && (arg_type == MIR_T_F || arg_type == MIR_T_D)) {
    switch (*fp_arg_num) {
    case 0:
    case 1:
    case 2:
    case 3:
    case 4:
    case 5:
    case 6:
    case 7: arg_reg = FA0_HARD_REG + *fp_arg_num; break;
    default: arg_reg = MIR_NON_VAR; break;
    }
    (*fp_arg_num)++;
    *mov_code = arg_type == MIR_T_F ? MIR_FMOV : MIR_DMOV;
  } else { /* including LD, BLK, RBLK: */
    if (arg_type == MIR_T_LD && *int_arg_num % 2 != 0) (*int_arg_num)++;
    switch (*int_arg_num) {
    case 0:
    case 1:
    case 2:
    case 3:
    case 4:
    case 5:
    case 6:
    case 7: arg_reg = A0_HARD_REG + *int_arg_num; break;
    default: arg_reg = MIR_NON_VAR; break;
    }
    (*int_arg_num)++;
    if (arg_type != MIR_T_LD) {
      *mov_code = MIR_MOV;
    } else {
      (*int_arg_num)++;
      *mov_code = MIR_LDMOV;
    }
  }
  return arg_reg;
}

static void mir_blk_mov (uint64_t *to, uint64_t *from, uint64_t nwords) {
  for (; nwords > 0; nwords--) *to++ = *from++;
}

static MIR_insn_t gen_mov (gen_ctx_t gen_ctx, MIR_insn_t anchor, MIR_insn_code_t code,
                           MIR_op_t dst_op, MIR_op_t src_op) {
  MIR_insn_t insn = MIR_new_insn (gen_ctx->ctx, code, dst_op, src_op);
  gen_add_insn_before (gen_ctx, anchor, insn);
  return insn;
}

static const char *BLK_MOV = "mir.blk_mov";
static const char *BLK_MOV_P = "mir.blk_mov.p";

static void gen_blk_mov (gen_ctx_t gen_ctx, MIR_insn_t anchor, size_t to_disp,
                         MIR_reg_t to_base_hard_reg, MIR_reg_t from_base_reg, size_t qwords,
                         int save_regs) {
  size_t from_disp = 0;
  MIR_context_t ctx = gen_ctx->ctx;
  MIR_func_t func = curr_func_item->u.func;
  MIR_item_t proto_item, func_import_item;
  MIR_insn_t new_insn;
  MIR_op_t ops[5], freg_op, treg_op, treg_op2, treg_op3, treg_op4;

  treg_op = _MIR_new_var_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
  treg_op2 = _MIR_new_var_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
  if (qwords <= 16) {
    gen_mov (gen_ctx, anchor, MIR_MOV, treg_op2, MIR_new_int_op (ctx, to_disp));
    gen_add_insn_before (gen_ctx, anchor,
                         MIR_new_insn (gen_ctx->ctx, MIR_ADD, treg_op2, treg_op2,
                                       _MIR_new_var_op (ctx, to_base_hard_reg)));
    for (; qwords > 0; qwords--, to_disp += 8, from_disp += 8) {
      gen_mov (gen_ctx, anchor, MIR_MOV, treg_op,
               _MIR_new_var_mem_op (ctx, MIR_T_I64, from_disp, from_base_reg, MIR_NON_VAR, 1));
      gen_mov (gen_ctx, anchor, MIR_MOV,
               _MIR_new_var_mem_op (ctx, MIR_T_I64, to_disp, to_base_hard_reg, MIR_NON_VAR, 1),
               treg_op);
    }
    return;
  }
  treg_op3 = _MIR_new_var_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
  /* Save arg regs: */
  if (save_regs > 0)
    gen_mov (gen_ctx, anchor, MIR_MOV, treg_op, _MIR_new_var_op (ctx, A0_HARD_REG));
  if (save_regs > 1)
    gen_mov (gen_ctx, anchor, MIR_MOV, treg_op2, _MIR_new_var_op (ctx, A1_HARD_REG));
  if (save_regs > 2)
    gen_mov (gen_ctx, anchor, MIR_MOV, treg_op3, _MIR_new_var_op (ctx, A2_HARD_REG));
  /* call blk move: */
  proto_item = _MIR_builtin_proto (ctx, curr_func_item->module, BLK_MOV_P, 0, NULL, 3, MIR_T_I64,
                                   "to", MIR_T_I64, "from", MIR_T_I64, "nwords");
  func_import_item = _MIR_builtin_func (ctx, curr_func_item->module, BLK_MOV, mir_blk_mov);
  freg_op = _MIR_new_var_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
  new_insn = MIR_new_insn (ctx, MIR_MOV, freg_op, MIR_new_ref_op (ctx, func_import_item));
  gen_add_insn_before (gen_ctx, anchor, new_insn);
  treg_op4 = _MIR_new_var_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
  gen_mov (gen_ctx, anchor, MIR_MOV, treg_op4, MIR_new_int_op (ctx, to_disp));
  gen_add_insn_before (gen_ctx, anchor,
                       MIR_new_insn (gen_ctx->ctx, MIR_ADD, _MIR_new_var_op (ctx, A0_HARD_REG),
                                     _MIR_new_var_op (ctx, to_base_hard_reg), treg_op4));
  gen_add_insn_before (gen_ctx, anchor,
                       MIR_new_insn (gen_ctx->ctx, MIR_ADD, _MIR_new_var_op (ctx, A1_HARD_REG),
                                     _MIR_new_var_op (ctx, from_base_reg),
                                     MIR_new_int_op (ctx, from_disp)));
  gen_mov (gen_ctx, anchor, MIR_MOV, _MIR_new_var_op (ctx, A2_HARD_REG),
           MIR_new_int_op (ctx, qwords));
  ops[0] = MIR_new_ref_op (ctx, proto_item);
  ops[1] = freg_op;
  ops[2] = _MIR_new_var_op (ctx, A0_HARD_REG);
  ops[3] = _MIR_new_var_op (ctx, A1_HARD_REG);
  ops[4] = _MIR_new_var_op (ctx, A2_HARD_REG);
  new_insn = MIR_new_insn_arr (ctx, MIR_CALL, 5, ops);
  gen_add_insn_before (gen_ctx, anchor, new_insn);
  /* Restore arg regs: */
  if (save_regs > 0)
    gen_mov (gen_ctx, anchor, MIR_MOV, _MIR_new_var_op (ctx, A0_HARD_REG), treg_op);
  if (save_regs > 1)
    gen_mov (gen_ctx, anchor, MIR_MOV, _MIR_new_var_op (ctx, A1_HARD_REG), treg_op2);
  if (save_regs > 2)
    gen_mov (gen_ctx, anchor, MIR_MOV, _MIR_new_var_op (ctx, R2_HARD_REG), treg_op3);
}

#define FMVXW_CODE 0
#define FMVXD_CODE 1

static void machinize_call (gen_ctx_t gen_ctx, MIR_insn_t call_insn) {
  MIR_context_t ctx = gen_ctx->ctx;
  MIR_func_t func = curr_func_item->u.func;
  MIR_proto_t proto = call_insn->ops[0].u.ref->u.proto;
  int float_p;
  size_t nargs, nops = MIR_insn_nops (ctx, call_insn), start = proto->nres + 2;
  size_t int_arg_num = 0, fp_arg_num = 0, mem_size = 0, blk_offset = 0, qwords;
  MIR_type_t type, mem_type;
  MIR_op_mode_t mode;
  MIR_var_t *arg_vars = NULL;
  MIR_reg_t arg_reg;
  MIR_op_t arg_op, temp_op, arg_reg_op, ret_reg_op, mem_op, treg_op;
  MIR_insn_code_t new_insn_code, ext_code;
  MIR_insn_t new_insn, prev_insn, next_insn, ext_insn;
  MIR_insn_t prev_call_insn = DLIST_PREV (MIR_insn_t, call_insn);
  MIR_insn_t curr_prev_call_insn = prev_call_insn;

  assert (__SIZEOF_LONG_DOUBLE__ == 16);
  if (call_insn->code == MIR_INLINE) call_insn->code = MIR_CALL;
  if (proto->args == NULL) {
    nargs = 0;
  } else {
    gen_assert (nops >= VARR_LENGTH (MIR_var_t, proto->args)
                && (proto->vararg_p || nops - start == VARR_LENGTH (MIR_var_t, proto->args)));
    nargs = VARR_LENGTH (MIR_var_t, proto->args);
    arg_vars = VARR_ADDR (MIR_var_t, proto->args);
  }
  if (call_insn->ops[1].mode != MIR_OP_VAR) {
    // ??? to optimize (can be immediate operand for func call)
    temp_op = _MIR_new_var_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
    new_insn = MIR_new_insn (ctx, MIR_MOV, temp_op, call_insn->ops[1]);
    call_insn->ops[1] = temp_op;
    gen_add_insn_before (gen_ctx, call_insn, new_insn);
  }
  for (size_t i = start; i < nops; i++) { /* calculate offset for blk params */
    if (i - start < nargs) {
      type = arg_vars[i - start].type;
    } else if (call_insn->ops[i].mode == MIR_OP_VAR_MEM) {
      type = call_insn->ops[i].u.mem.type;
      gen_assert (MIR_all_blk_type_p (type));
    } else {
      mode = call_insn->ops[i].value_mode;  // ??? smaller ints
      gen_assert (mode == MIR_OP_INT || mode == MIR_OP_UINT || mode == MIR_OP_FLOAT
                  || mode == MIR_OP_DOUBLE || mode == MIR_OP_LDOUBLE);
      if (mode == MIR_OP_FLOAT)
        (*MIR_get_error_func (ctx)) (MIR_call_op_error,
                                     "passing float variadic arg (should be passed as double)");
      type = mode == MIR_OP_DOUBLE ? MIR_T_D : mode == MIR_OP_LDOUBLE ? MIR_T_LD : MIR_T_I64;
    }
    gen_assert (!MIR_all_blk_type_p (type) || call_insn->ops[i].mode == MIR_OP_VAR_MEM);
    if ((MIR_T_I8 <= type && type <= MIR_T_U64) || type == MIR_T_P || type == MIR_T_LD
        || MIR_all_blk_type_p (type)) {
      if (MIR_blk_type_p (type) && (qwords = (call_insn->ops[i].u.mem.disp + 7) / 8) <= 2) {
        if (type == MIR_T_BLK + 1) int_arg_num = (int_arg_num + 1) / 2 * 2; /* Make even */
        if (int_arg_num + qwords > 8)
          blk_offset += (qwords - (int_arg_num + qwords == 9 ? 1 : 0)) * 8;
        int_arg_num += qwords;
      } else { /* blocks here are passed by address */
        if (type == MIR_T_LD) int_arg_num = (int_arg_num + 1) / 2 * 2; /* Make even */
        if (int_arg_num >= 8) blk_offset += 8 + (type == MIR_T_LD ? 8 : 0);
        int_arg_num++;
        if (type == MIR_T_LD) int_arg_num++;
      }
    } else if (type == MIR_T_F || type == MIR_T_D) {
      if (i - start >= nargs) { /* varargs are passed by int regs */
        if (int_arg_num >= 8) blk_offset += 8;
        int_arg_num++;
      } else {
        if (fp_arg_num >= 8) blk_offset += 8;
        fp_arg_num++;
      }
    } else {
      MIR_get_error_func (ctx) (MIR_call_op_error, "wrong type of arg value");
    }
  }
  blk_offset = (blk_offset + 15) / 16 * 16; /* align stack */
  int_arg_num = fp_arg_num = 0;
  for (size_t i = start; i < nops; i++) {
    arg_op = call_insn->ops[i];
    gen_assert (arg_op.mode == MIR_OP_VAR
                || (arg_op.mode == MIR_OP_VAR_MEM && MIR_all_blk_type_p (arg_op.u.mem.type)));
    if (i - start < nargs) {
      type = arg_vars[i - start].type;
    } else if (call_insn->ops[i].mode == MIR_OP_VAR_MEM) {
      type = call_insn->ops[i].u.mem.type;
      gen_assert (MIR_all_blk_type_p (type));
    } else {
      mode = call_insn->ops[i].value_mode;  // ??? smaller ints
      type = mode == MIR_OP_DOUBLE ? MIR_T_D : mode == MIR_OP_LDOUBLE ? MIR_T_LD : MIR_T_I64;
    }
    ext_insn = NULL;
    if ((ext_code = get_ext_code (type, TRUE)) != MIR_INVALID_INSN) { /* extend arg if necessary */
      temp_op = _MIR_new_var_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
      ext_insn = MIR_new_insn (ctx, ext_code, temp_op, arg_op);
      call_insn->ops[i] = arg_op = temp_op;
    }
    gen_assert (!MIR_all_blk_type_p (type)
                || (arg_op.mode == MIR_OP_VAR_MEM && arg_op.u.mem.disp >= 0
                    && arg_op.u.mem.index == MIR_NON_VAR));
    if (MIR_blk_type_p (type)) {
      qwords = (arg_op.u.mem.disp + 7) / 8;
      if (qwords <= 2) {
        arg_reg = A0_HARD_REG + int_arg_num;
        if (type == MIR_T_BLK + 1) int_arg_num = (int_arg_num + 1) / 2 * 2; /* Make even */
        for (size_t n = 0; n < qwords; n++) {
          if (int_arg_num < 8) {
            new_insn = MIR_new_insn (ctx, MIR_MOV, _MIR_new_var_op (ctx, A0_HARD_REG + int_arg_num),
                                     _MIR_new_var_mem_op (ctx, MIR_T_I64, n * 8, arg_op.u.mem.base,
                                                          MIR_NON_VAR, 1));
            gen_add_insn_before (gen_ctx, call_insn, new_insn);
            setup_call_hard_reg_args (gen_ctx, call_insn, A0_HARD_REG + int_arg_num);
            int_arg_num++;
          } else { /* put word on stack */
            treg_op = _MIR_new_var_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
            new_insn = MIR_new_insn (ctx, MIR_MOV, treg_op,
                                     _MIR_new_var_mem_op (ctx, MIR_T_I64, n * 8, arg_op.u.mem.base,
                                                          MIR_NON_VAR, 1));
            gen_add_insn_before (gen_ctx, call_insn, new_insn);
            mem_op = _MIR_new_var_mem_op (ctx, MIR_T_I64, mem_size, SP_HARD_REG, MIR_NON_VAR, 1);
            new_insn = MIR_new_insn (ctx, MIR_MOV, mem_op, treg_op);
            gen_add_insn_before (gen_ctx, call_insn, new_insn);
            mem_size += 8;
          }
        }
        continue;
      }
      /* Put on stack and pass the address: */
      gen_blk_mov (gen_ctx, call_insn, blk_offset, SP_HARD_REG, arg_op.u.mem.base, qwords,
                   int_arg_num);
      arg_op = _MIR_new_var_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
      gen_assert (curr_prev_call_insn
                  != NULL); /* call_insn should not be 1st after simplification */
      new_insn = MIR_new_insn (gen_ctx->ctx, MIR_ADD, arg_op, _MIR_new_var_op (ctx, SP_HARD_REG),
                               MIR_new_int_op (ctx, blk_offset));
      gen_add_insn_after (gen_ctx, curr_prev_call_insn, new_insn);
      curr_prev_call_insn = DLIST_NEXT (MIR_insn_t, new_insn);
      blk_offset += qwords * 8;
    }
    if ((arg_reg
         = get_arg_reg (type, i - start >= nargs, &int_arg_num, &fp_arg_num, &new_insn_code))
        != MIR_NON_VAR) {
      /* put arguments to argument hard regs */
      if (ext_insn != NULL) gen_add_insn_before (gen_ctx, call_insn, ext_insn);
      arg_reg_op = _MIR_new_var_op (ctx, arg_reg);
      if (type != MIR_T_RBLK) {
        if (new_insn_code == MIR_MOV && (type == MIR_T_F || type == MIR_T_D)) {
          new_insn
            = _MIR_new_unspec_insn (ctx, 3,
                                    MIR_new_int_op (ctx, type == MIR_T_F ? FMVXW_CODE : FMVXD_CODE),
                                    arg_reg_op, arg_op);
        } else {
          new_insn = MIR_new_insn (ctx, new_insn_code, arg_reg_op, arg_op);
        }
      } else {
        assert (arg_op.mode == MIR_OP_VAR_MEM);
        new_insn
          = MIR_new_insn (ctx, MIR_MOV, arg_reg_op, _MIR_new_var_op (ctx, arg_op.u.mem.base));
        arg_reg_op
          = _MIR_new_var_mem_op (ctx, MIR_T_RBLK, arg_op.u.mem.disp, arg_reg, MIR_NON_VAR, 1);
      }
      gen_add_insn_before (gen_ctx, call_insn, new_insn);
      call_insn->ops[i] = arg_reg_op;
      if (type == MIR_T_LD) /* long double is passed in 2 int hard regs: */
        setup_call_hard_reg_args (gen_ctx, call_insn, arg_reg + 1);
    } else { /* put arguments on the stack */
      mem_type = type == MIR_T_F || type == MIR_T_D || type == MIR_T_LD ? type : MIR_T_I64;
      new_insn_code = (type == MIR_T_F    ? MIR_FMOV
                       : type == MIR_T_D  ? MIR_DMOV
                       : type == MIR_T_LD ? MIR_LDMOV
                                          : MIR_MOV);
      mem_op = _MIR_new_var_mem_op (ctx, mem_type, mem_size, SP_HARD_REG, MIR_NON_VAR, 1);
      if (type != MIR_T_RBLK) {
        new_insn = MIR_new_insn (ctx, new_insn_code, mem_op, arg_op);
      } else {
        assert (arg_op.mode == MIR_OP_VAR_MEM);
        new_insn
          = MIR_new_insn (ctx, new_insn_code, mem_op, _MIR_new_var_op (ctx, arg_op.u.mem.base));
      }
      gen_assert (curr_prev_call_insn != NULL); /* call should not be 1st after simplification */
      MIR_insert_insn_after (ctx, curr_func_item, curr_prev_call_insn, new_insn);
      prev_insn = DLIST_PREV (MIR_insn_t, new_insn);
      next_insn = DLIST_NEXT (MIR_insn_t, new_insn);
      create_new_bb_insns (gen_ctx, prev_insn, next_insn, call_insn);
      call_insn->ops[i] = mem_op;
      mem_size += type == MIR_T_LD ? 16 : 8;
      if (ext_insn != NULL) gen_add_insn_after (gen_ctx, curr_prev_call_insn, ext_insn);
    }
  }
  blk_offset = (blk_offset + 15) / 16 * 16;
  if (blk_offset != 0) mem_size = blk_offset;
  int_arg_num = fp_arg_num = 0;
  for (size_t i = 0; i < proto->nres; i++) {
    ret_reg_op = call_insn->ops[i + 2];
    gen_assert (ret_reg_op.mode == MIR_OP_VAR);
    type = proto->res_types[i];
    float_p = type == MIR_T_F || type == MIR_T_D;
    if (float_p && fp_arg_num < 2) {
      new_insn = MIR_new_insn (ctx, type == MIR_T_F ? MIR_FMOV : MIR_DMOV, ret_reg_op,
                               _MIR_new_var_op (ctx, FA0_HARD_REG + fp_arg_num));
      fp_arg_num++;
    } else if (type == MIR_T_LD && int_arg_num < 2) {
      new_insn = MIR_new_insn (ctx, MIR_LDMOV, ret_reg_op,
                               _MIR_new_var_op (ctx, A0_HARD_REG + int_arg_num));
      int_arg_num += 2;
    } else if (!float_p && int_arg_num < 2) {
      new_insn
        = MIR_new_insn (ctx, MIR_MOV, ret_reg_op, _MIR_new_var_op (ctx, A0_HARD_REG + int_arg_num));
      int_arg_num++;
    } else {
      (*MIR_get_error_func (ctx)) (MIR_ret_error,
                                   "riscv can not handle this combination of return values");
    }
    MIR_insert_insn_after (ctx, curr_func_item, call_insn, new_insn);
    call_insn->ops[i + 2] = new_insn->ops[1];
    if ((ext_code = get_ext_code (type, FALSE)) != MIR_INVALID_INSN) {
      MIR_insert_insn_after (ctx, curr_func_item, new_insn,
                             MIR_new_insn (ctx, ext_code, ret_reg_op, ret_reg_op));
      new_insn = DLIST_NEXT (MIR_insn_t, new_insn);
    }
    create_new_bb_insns (gen_ctx, call_insn, DLIST_NEXT (MIR_insn_t, new_insn), call_insn);
  }
  if (mem_size != 0) { /* allocate/deallocate stack for args passed on stack */
    temp_op = _MIR_new_var_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
    mem_size = (mem_size + 15) / 16 * 16; /* make it of several 16 bytes */
    new_insn = MIR_new_insn (ctx, MIR_ADD, _MIR_new_var_op (ctx, SP_HARD_REG),
                             _MIR_new_var_op (ctx, SP_HARD_REG), temp_op);
    MIR_insert_insn_after (ctx, curr_func_item, prev_call_insn, new_insn);
    next_insn = DLIST_NEXT (MIR_insn_t, new_insn);
    new_insn = MIR_new_insn (ctx, MIR_MOV, temp_op, MIR_new_int_op (ctx, -(int64_t) mem_size));
    MIR_insert_insn_after (ctx, curr_func_item, prev_call_insn, new_insn);
    create_new_bb_insns (gen_ctx, prev_call_insn, next_insn, call_insn);
    temp_op = _MIR_new_var_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
    new_insn = MIR_new_insn (ctx, MIR_MOV, temp_op, MIR_new_int_op (ctx, mem_size));
    MIR_insert_insn_after (ctx, curr_func_item, call_insn, new_insn);
    next_insn = DLIST_NEXT (MIR_insn_t, new_insn);
    new_insn = MIR_new_insn (ctx, MIR_ADD, _MIR_new_var_op (ctx, SP_HARD_REG),
                             _MIR_new_var_op (ctx, SP_HARD_REG), temp_op);
    MIR_insert_insn_before (ctx, curr_func_item, next_insn, new_insn);
    create_new_bb_insns (gen_ctx, call_insn, next_insn, call_insn);
  }
}

static long double mir_i2ld (int64_t i) { return i; }
static const char *I2LD = "mir.i2ld";
static const char *I2LD_P = "mir.i2ld.p";

static long double mir_ui2ld (uint64_t i) { return i; }
static const char *UI2LD = "mir.ui2ld";
static const char *UI2LD_P = "mir.ui2ld.p";

static long double mir_f2ld (float f) { return f; }
static const char *F2LD = "mir.f2ld";
static const char *F2LD_P = "mir.f2ld.p";

static long double mir_d2ld (double d) { return d; }
static const char *D2LD = "mir.d2ld";
static const char *D2LD_P = "mir.d2ld.p";

static int64_t mir_ld2i (long double ld) { return ld; }
static const char *LD2I = "mir.ld2i";
static const char *LD2I_P = "mir.ld2i.p";

static float mir_ld2f (long double ld) { return ld; }
static const char *LD2F = "mir.ld2f";
static const char *LD2F_P = "mir.ld2f.p";

static double mir_ld2d (long double ld) { return ld; }
static const char *LD2D = "mir.ld2d";
static const char *LD2D_P = "mir.ld2d.p";

static long double mir_ldadd (long double d1, long double d2) { return d1 + d2; }
static const char *LDADD = "mir.ldadd";
static const char *LDADD_P = "mir.ldadd.p";

static long double mir_ldsub (long double d1, long double d2) { return d1 - d2; }
static const char *LDSUB = "mir.ldsub";
static const char *LDSUB_P = "mir.ldsub.p";

static long double mir_ldmul (long double d1, long double d2) { return d1 * d2; }
static const char *LDMUL = "mir.ldmul";
static const char *LDMUL_P = "mir.ldmul.p";

static long double mir_lddiv (long double d1, long double d2) { return d1 / d2; }
static const char *LDDIV = "mir.lddiv";
static const char *LDDIV_P = "mir.lddiv.p";

static long double mir_ldneg (long double d) { return -d; }
static const char *LDNEG = "mir.ldneg";
static const char *LDNEG_P = "mir.ldneg.p";

static const char *VA_ARG_P = "mir.va_arg.p";
static const char *VA_ARG = "mir.va_arg";
static const char *VA_BLOCK_ARG_P = "mir.va_block_arg.p";
static const char *VA_BLOCK_ARG = "mir.va_block_arg";

static int64_t mir_ldeq (long double d1, long double d2) { return d1 == d2; }
static const char *LDEQ = "mir.ldeq";
static const char *LDEQ_P = "mir.ldeq.p";

static int64_t mir_ldne (long double d1, long double d2) { return d1 != d2; }
static const char *LDNE = "mir.ldne";
static const char *LDNE_P = "mir.ldne.p";

static int64_t mir_ldlt (long double d1, long double d2) { return d1 < d2; }
static const char *LDLT = "mir.ldlt";
static const char *LDLT_P = "mir.ldlt.p";

static int64_t mir_ldge (long double d1, long double d2) { return d1 >= d2; }
static const char *LDGE = "mir.ldge";
static const char *LDGE_P = "mir.ldge.p";

static int64_t mir_ldgt (long double d1, long double d2) { return d1 > d2; }
static const char *LDGT = "mir.ldgt";
static const char *LDGT_P = "mir.ldgt.p";

static int64_t mir_ldle (long double d1, long double d2) { return d1 <= d2; }
static const char *LDLE = "mir.ldle";
static const char *LDLE_P = "mir.ldle.p";

static int get_builtin (gen_ctx_t gen_ctx, MIR_insn_code_t code, MIR_item_t *proto_item,
                        MIR_item_t *func_import_item) {
  MIR_context_t ctx = gen_ctx->ctx;
  MIR_type_t res_type;

  *func_import_item = *proto_item = NULL; /* to remove uninitialized warning */
  switch (code) {
  case MIR_I2LD:
    res_type = MIR_T_LD;
    *proto_item
      = _MIR_builtin_proto (ctx, curr_func_item->module, I2LD_P, 1, &res_type, 1, MIR_T_I64, "v");
    *func_import_item = _MIR_builtin_func (ctx, curr_func_item->module, I2LD, mir_i2ld);
    return 1;
  case MIR_UI2LD:
    res_type = MIR_T_LD;
    *proto_item
      = _MIR_builtin_proto (ctx, curr_func_item->module, UI2LD_P, 1, &res_type, 1, MIR_T_I64, "v");
    *func_import_item = _MIR_builtin_func (ctx, curr_func_item->module, UI2LD, mir_ui2ld);
    return 1;
  case MIR_F2LD:
    res_type = MIR_T_LD;
    *proto_item
      = _MIR_builtin_proto (ctx, curr_func_item->module, F2LD_P, 1, &res_type, 1, MIR_T_F, "v");
    *func_import_item = _MIR_builtin_func (ctx, curr_func_item->module, F2LD, mir_f2ld);
    return 1;
  case MIR_D2LD:
    res_type = MIR_T_LD;
    *proto_item
      = _MIR_builtin_proto (ctx, curr_func_item->module, D2LD_P, 1, &res_type, 1, MIR_T_D, "v");
    *func_import_item = _MIR_builtin_func (ctx, curr_func_item->module, D2LD, mir_d2ld);
    return 1;
  case MIR_LD2I:
    res_type = MIR_T_I64;
    *proto_item
      = _MIR_builtin_proto (ctx, curr_func_item->module, LD2I_P, 1, &res_type, 1, MIR_T_LD, "v");
    *func_import_item = _MIR_builtin_func (ctx, curr_func_item->module, LD2I, mir_ld2i);
    return 1;
  case MIR_LD2F:
    res_type = MIR_T_F;
    *proto_item
      = _MIR_builtin_proto (ctx, curr_func_item->module, LD2F_P, 1, &res_type, 1, MIR_T_LD, "v");
    *func_import_item = _MIR_builtin_func (ctx, curr_func_item->module, LD2F, mir_ld2f);
    return 1;
  case MIR_LD2D:
    res_type = MIR_T_D;
    *proto_item
      = _MIR_builtin_proto (ctx, curr_func_item->module, LD2D_P, 1, &res_type, 1, MIR_T_LD, "v");
    *func_import_item = _MIR_builtin_func (ctx, curr_func_item->module, LD2D, mir_ld2d);
    return 1;
  case MIR_LDADD:
    res_type = MIR_T_LD;
    *proto_item = _MIR_builtin_proto (ctx, curr_func_item->module, LDADD_P, 1, &res_type, 2,
                                      MIR_T_LD, "d1", MIR_T_LD, "d2");
    *func_import_item = _MIR_builtin_func (ctx, curr_func_item->module, LDADD, mir_ldadd);
    return 2;
  case MIR_LDSUB:
    res_type = MIR_T_LD;
    *proto_item = _MIR_builtin_proto (ctx, curr_func_item->module, LDSUB_P, 1, &res_type, 2,
                                      MIR_T_LD, "d1", MIR_T_LD, "d2");
    *func_import_item = _MIR_builtin_func (ctx, curr_func_item->module, LDSUB, mir_ldsub);
    return 2;
  case MIR_LDMUL:
    res_type = MIR_T_LD;
    *proto_item = _MIR_builtin_proto (ctx, curr_func_item->module, LDMUL_P, 1, &res_type, 2,
                                      MIR_T_LD, "d1", MIR_T_LD, "d2");
    *func_import_item = _MIR_builtin_func (ctx, curr_func_item->module, LDMUL, mir_ldmul);
    return 2;
  case MIR_LDDIV:
    res_type = MIR_T_LD;
    *proto_item = _MIR_builtin_proto (ctx, curr_func_item->module, LDDIV_P, 1, &res_type, 2,
                                      MIR_T_LD, "d1", MIR_T_LD, "d2");
    *func_import_item = _MIR_builtin_func (ctx, curr_func_item->module, LDDIV, mir_lddiv);
    return 2;
  case MIR_LDNEG:
    res_type = MIR_T_LD;
    *proto_item
      = _MIR_builtin_proto (ctx, curr_func_item->module, LDNEG_P, 1, &res_type, 1, MIR_T_LD, "d");
    *func_import_item = _MIR_builtin_func (ctx, curr_func_item->module, LDNEG, mir_ldneg);
    return 1;
  case MIR_LDEQ:
    res_type = MIR_T_I64;
    *proto_item = _MIR_builtin_proto (ctx, curr_func_item->module, LDEQ_P, 1, &res_type, 2,
                                      MIR_T_LD, "d1", MIR_T_LD, "d2");
    *func_import_item = _MIR_builtin_func (ctx, curr_func_item->module, LDEQ, mir_ldeq);
    return 2;
  case MIR_LDNE:
    res_type = MIR_T_I64;
    *proto_item = _MIR_builtin_proto (ctx, curr_func_item->module, LDNE_P, 1, &res_type, 2,
                                      MIR_T_LD, "d1", MIR_T_LD, "d2");
    *func_import_item = _MIR_builtin_func (ctx, curr_func_item->module, LDNE, mir_ldne);
    return 2;
  case MIR_LDLT:
    res_type = MIR_T_I64;
    *proto_item = _MIR_builtin_proto (ctx, curr_func_item->module, LDLT_P, 1, &res_type, 2,
                                      MIR_T_LD, "d1", MIR_T_LD, "d2");
    *func_import_item = _MIR_builtin_func (ctx, curr_func_item->module, LDLT, mir_ldlt);
    return 2;
  case MIR_LDGE:
    res_type = MIR_T_I64;
    *proto_item = _MIR_builtin_proto (ctx, curr_func_item->module, LDGE_P, 1, &res_type, 2,
                                      MIR_T_LD, "d1", MIR_T_LD, "d2");
    *func_import_item = _MIR_builtin_func (ctx, curr_func_item->module, LDGE, mir_ldge);
    return 2;
  case MIR_LDGT:
    res_type = MIR_T_I64;
    *proto_item = _MIR_builtin_proto (ctx, curr_func_item->module, LDGT_P, 1, &res_type, 2,
                                      MIR_T_LD, "d1", MIR_T_LD, "d2");
    *func_import_item = _MIR_builtin_func (ctx, curr_func_item->module, LDGT, mir_ldgt);
    return 2;
  case MIR_LDLE:
    res_type = MIR_T_I64;
    *proto_item = _MIR_builtin_proto (ctx, curr_func_item->module, LDLE_P, 1, &res_type, 2,
                                      MIR_T_LD, "d1", MIR_T_LD, "d2");
    *func_import_item = _MIR_builtin_func (ctx, curr_func_item->module, LDLE, mir_ldle);
    return 2;
  case MIR_VA_ARG:
    res_type = MIR_T_I64;
    *proto_item = _MIR_builtin_proto (ctx, curr_func_item->module, VA_ARG_P, 1, &res_type, 2,
                                      MIR_T_I64, "va", MIR_T_I64, "type");
    *func_import_item = _MIR_builtin_func (ctx, curr_func_item->module, VA_ARG, va_arg_builtin);
    return 2;
  case MIR_VA_BLOCK_ARG:
    *proto_item
      = _MIR_builtin_proto (ctx, curr_func_item->module, VA_BLOCK_ARG_P, 0, NULL, 4, MIR_T_I64,
                            "res", MIR_T_I64, "va", MIR_T_I64, "size", MIR_T_I64, "ncase");
    *func_import_item
      = _MIR_builtin_func (ctx, curr_func_item->module, VA_BLOCK_ARG, va_block_arg_builtin);
    return 4;
  default: return 0;
  }
}

struct insn_pattern_info {
  int start, num;
};

typedef struct insn_pattern_info insn_pattern_info_t;
DEF_VARR (insn_pattern_info_t);

enum branch_type { BRANCH, JAL, AUIPC, AUIPC_JALR };
struct label_ref {
  int abs_addr_p;
  enum branch_type branch_type;
  size_t label_val_disp;
  union {
    MIR_label_t label;
    void *jump_addr; /* absolute addr for BBV */
  } u;
};

typedef struct label_ref label_ref_t;
DEF_VARR (label_ref_t);

struct const_ref {
  uint64_t val;
  size_t const_addr_disp;
};

typedef struct const_ref const_ref_t;
DEF_VARR (const_ref_t);

struct target_ctx {
  unsigned char alloca_p, block_arg_func_p, leaf_p, add_nops;
  uint32_t non_vararg_int_args_num;
  size_t small_aggregate_save_area;
  MIR_insn_t temp_jump;
  const char *temp_jump_replacement;
  VARR (int) * pattern_indexes;
  VARR (insn_pattern_info_t) * insn_pattern_info;
  VARR (uint8_t) * result_code;
  VARR (label_ref_t) * label_refs;
  VARR (const_ref_t) * const_refs;
  VARR (uint64_t) * abs_address_locs;
  VARR (MIR_code_reloc_t) * relocs;
};

#define alloca_p gen_ctx->target_ctx->alloca_p
#define block_arg_func_p gen_ctx->target_ctx->block_arg_func_p
#define leaf_p gen_ctx->target_ctx->leaf_p
#define add_nops gen_ctx->target_ctx->add_nops
#define non_vararg_int_args_num gen_ctx->target_ctx->non_vararg_int_args_num
#define small_aggregate_save_area gen_ctx->target_ctx->small_aggregate_save_area
#define temp_jump gen_ctx->target_ctx->temp_jump
#define temp_jump_replacement gen_ctx->target_ctx->temp_jump_replacement
#define pattern_indexes gen_ctx->target_ctx->pattern_indexes
#define insn_pattern_info gen_ctx->target_ctx->insn_pattern_info
#define result_code gen_ctx->target_ctx->result_code
#define label_refs gen_ctx->target_ctx->label_refs
#define const_refs gen_ctx->target_ctx->const_refs
#define abs_address_locs gen_ctx->target_ctx->abs_address_locs
#define relocs gen_ctx->target_ctx->relocs

static MIR_disp_t target_get_stack_slot_offset (gen_ctx_t gen_ctx, MIR_type_t type MIR_UNUSED,
                                                MIR_reg_t slot) {
  /* slot is 0, 1, ... */
  size_t offset = curr_func_item->u.func->vararg_p || block_arg_func_p ? 32 : 16;

  return ((MIR_disp_t) slot * 8 + offset);
}

static MIR_reg_t target_get_stack_slot_base_reg (gen_ctx_t gen_ctx MIR_UNUSED) {
  return FP_HARD_REG;
}

static int target_valid_mem_offset_p (gen_ctx_t gen_ctx MIR_UNUSED, MIR_type_t type,
                                      MIR_disp_t offset) {
  MIR_disp_t offset2 = type == MIR_T_LD ? offset + 8 : offset;
  return -(1 << 11) <= offset && offset2 < (1 << 11);
}

static void target_machinize (gen_ctx_t gen_ctx) {
  MIR_context_t ctx = gen_ctx->ctx;
  MIR_func_t func;
  MIR_type_t type, mem_type, res_type;
  MIR_insn_code_t code, ext_code, new_insn_code;
  MIR_insn_t insn, next_insn, new_insn, anchor;
  MIR_var_t var;
  MIR_reg_t ret_reg, arg_reg;
  MIR_op_t ret_reg_op, arg_reg_op, mem_op, temp_op, treg_op;
  size_t i, int_arg_num, fp_arg_num, mem_size, qwords;

  assert (curr_func_item->item_type == MIR_func_item);
  func = curr_func_item->u.func;
  block_arg_func_p = FALSE;
  anchor = DLIST_HEAD (MIR_insn_t, func->insns);
  small_aggregate_save_area = 0;
  for (i = int_arg_num = fp_arg_num = mem_size = 0; i < func->nargs; i++) {
    /* Argument extensions is already done in simplify */
    /* Prologue: generate arg_var = hard_reg|stack mem|stack addr ... */
    var = VARR_GET (MIR_var_t, func->vars, i);
    type = var.type;
    if (MIR_blk_type_p (type) && (qwords = (var.size + 7) / 8) <= 2) {
      if (type == MIR_T_BLK + 1) int_arg_num = (int_arg_num + 1) / 2 * 2; /* Make even */
      if (int_arg_num < 8) {
        MIR_insn_code_t mov_code1 = MIR_MOV;
        MIR_type_t mem_type1 = MIR_T_I64;
        MIR_reg_t base_arg_reg = A0_HARD_REG;
        size_t arg_reg_num = int_arg_num;

        small_aggregate_save_area += qwords * 8;
        gen_assert (small_aggregate_save_area < (1 << 11));
        new_insn = MIR_new_insn (ctx, MIR_ADD, _MIR_new_var_op (ctx, i + MAX_HARD_REG + 1),
                                 _MIR_new_var_op (ctx, FP_HARD_REG),
                                 MIR_new_int_op (ctx, -(int64_t) small_aggregate_save_area));
        gen_add_insn_before (gen_ctx, anchor, new_insn);
        if (qwords == 0) continue;
        gen_mov (gen_ctx, anchor, mov_code1,
                 _MIR_new_var_mem_op (ctx, mem_type1, 0, i + MAX_HARD_REG + 1, MIR_NON_VAR, 1),
                 _MIR_new_var_op (ctx, base_arg_reg + arg_reg_num));
        if (qwords == 2) {
          MIR_insn_code_t mov_code2 = MIR_MOV;
          MIR_type_t mem_type2 = MIR_T_I64;
          MIR_disp_t disp = 8;
          if (arg_reg_num < 7) {
            gen_mov (gen_ctx, anchor, mov_code2,
                     _MIR_new_var_mem_op (ctx, mem_type2, disp, i + MAX_HARD_REG + 1, MIR_NON_VAR,
                                          1),
                     _MIR_new_var_op (ctx, base_arg_reg + arg_reg_num + 1));
          } else {
            if (!block_arg_func_p) { /* t0 = prev sp */
              block_arg_func_p = TRUE;
              gen_mov (gen_ctx, anchor, MIR_MOV, _MIR_new_var_op (ctx, T0_HARD_REG),
                       _MIR_new_var_mem_op (ctx, MIR_T_I64, 16, FP_HARD_REG, MIR_NON_VAR, 1));
            }
            treg_op = _MIR_new_var_op (ctx, T1_HARD_REG);
            gen_mov (gen_ctx, anchor, mov_code2, treg_op,
                     _MIR_new_var_mem_op (ctx, mem_type2, mem_size, T0_HARD_REG, MIR_NON_VAR, 1));
            gen_mov (gen_ctx, anchor, mov_code2,
                     _MIR_new_var_mem_op (ctx, mem_type2, disp, i + MAX_HARD_REG + 1, MIR_NON_VAR,
                                          1),
                     treg_op);
            mem_size += 8;
          }
        }
        int_arg_num += qwords;
      } else {                   /* fully on stack -- use the address: */
        if (!block_arg_func_p) { /* t0 = prev sp */
          block_arg_func_p = TRUE;
          gen_mov (gen_ctx, anchor, MIR_MOV, _MIR_new_var_op (ctx, T0_HARD_REG),
                   _MIR_new_var_mem_op (ctx, MIR_T_I64, 16, FP_HARD_REG, MIR_NON_VAR, 1));
        }
        new_insn
          = MIR_new_insn (ctx, MIR_ADD, _MIR_new_var_op (ctx, i + MAX_HARD_REG + 1),
                          _MIR_new_var_op (ctx, T0_HARD_REG), MIR_new_int_op (ctx, mem_size));
        gen_add_insn_before (gen_ctx, anchor, new_insn);
        mem_size += qwords * 8;
      }
      continue;
    }
    arg_reg = get_arg_reg (type, FALSE, &int_arg_num, &fp_arg_num, &new_insn_code);
    if (arg_reg != MIR_NON_VAR) {
      arg_reg_op = _MIR_new_var_op (ctx, arg_reg);
      gen_mov (gen_ctx, anchor, new_insn_code, _MIR_new_var_op (ctx, i + MAX_HARD_REG + 1),
               arg_reg_op);
    } else { /* arg is on the stack or blk address is on the stack: */
      if (!block_arg_func_p) {
        block_arg_func_p = TRUE;
        gen_mov (gen_ctx, anchor, MIR_MOV, _MIR_new_var_op (ctx, T0_HARD_REG),
                 _MIR_new_var_mem_op (ctx, MIR_T_I64, 16, FP_HARD_REG, MIR_NON_VAR, 1));
      }
      mem_type = type == MIR_T_F || type == MIR_T_D || type == MIR_T_LD ? type : MIR_T_I64;
      new_insn_code = (type == MIR_T_F    ? MIR_FMOV
                       : type == MIR_T_D  ? MIR_DMOV
                       : type == MIR_T_LD ? MIR_LDMOV
                                          : MIR_MOV);
      mem_op = _MIR_new_var_mem_op (ctx, mem_type, mem_size, T0_HARD_REG, MIR_NON_VAR, 1);
      gen_mov (gen_ctx, anchor, new_insn_code, _MIR_new_var_op (ctx, i + MAX_HARD_REG + 1), mem_op);
      mem_size += type == MIR_T_LD ? 16 : 8;
    }
  }
  non_vararg_int_args_num = int_arg_num;
  alloca_p = FALSE;
  leaf_p = TRUE;
  for (insn = DLIST_HEAD (MIR_insn_t, func->insns); insn != NULL; insn = next_insn) {
    MIR_item_t proto_item, func_import_item;
    int nargs;

    next_insn = DLIST_NEXT (MIR_insn_t, insn);
    code = insn->code;
    switch (code) {
    case MIR_FBEQ: code = MIR_FEQ; break;
    case MIR_FBNE: code = MIR_FNE; break;
    case MIR_FBLT: code = MIR_FLT; break;
    case MIR_FBGE: code = MIR_FGE; break;
    case MIR_FBGT: code = MIR_FGT; break;
    case MIR_FBLE: code = MIR_FLE; break;
    case MIR_DBEQ: code = MIR_DEQ; break;
    case MIR_DBNE: code = MIR_DNE; break;
    case MIR_DBLT: code = MIR_DLT; break;
    case MIR_DBGE: code = MIR_DGE; break;
    case MIR_DBGT: code = MIR_DGT; break;
    case MIR_DBLE: code = MIR_DLE; break;
    case MIR_LDBEQ: code = MIR_LDEQ; break;
    case MIR_LDBNE: code = MIR_LDNE; break;
    case MIR_LDBLT: code = MIR_LDLT; break;
    case MIR_LDBGE: code = MIR_LDGE; break;
    case MIR_LDBGT: code = MIR_LDGT; break;
    case MIR_LDBLE: code = MIR_LDLE; break;
    case MIR_EQS:
    case MIR_NES:
    case MIR_BEQS:
    case MIR_BNES:
    case MIR_LTS:
    case MIR_LES:
    case MIR_GTS:
    case MIR_GES:
    case MIR_BLTS:
    case MIR_BLES:
    case MIR_BGTS:
    case MIR_BGES: ext_code = MIR_EXT32; goto short_cmp;
    case MIR_ULTS:
    case MIR_ULES:
    case MIR_UGTS:
    case MIR_UGES:
    case MIR_UBLTS:
    case MIR_UBLES:
    case MIR_UBGTS:
    case MIR_UBGES:
      ext_code = MIR_UEXT32;
    short_cmp:
      temp_op = _MIR_new_var_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
      new_insn = MIR_new_insn (ctx, ext_code, temp_op, insn->ops[1]);
      gen_add_insn_before (gen_ctx, insn, new_insn);
      treg_op = _MIR_new_var_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
      new_insn = MIR_new_insn (ctx, ext_code, treg_op, insn->ops[2]);
      gen_add_insn_before (gen_ctx, insn, new_insn);
      insn->ops[1] = temp_op;
      insn->ops[2] = treg_op;
      break;
    default: break;
    }
    if (code != insn->code) {
      temp_op = _MIR_new_var_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
      new_insn = MIR_new_insn (ctx, code, temp_op, insn->ops[1], insn->ops[2]);
      gen_add_insn_before (gen_ctx, insn, new_insn);
      next_insn = MIR_new_insn (ctx, MIR_BT, insn->ops[0], temp_op);
      gen_add_insn_after (gen_ctx, new_insn, next_insn);
      gen_delete_insn (gen_ctx, insn);
      insn = new_insn;
    }
    if ((nargs = get_builtin (gen_ctx, code, &proto_item, &func_import_item)) > 0) {
      if (code == MIR_VA_ARG || code == MIR_VA_BLOCK_ARG) {
        /* Use a builtin func call:
           mov func_reg, func ref; [mov reg3, type;] call proto, func_reg, res_reg, va_reg,
           reg3 */
        MIR_op_t ops[6], func_reg_op, reg_op3;
        MIR_op_t res_reg_op = insn->ops[0], va_reg_op = insn->ops[1], op3 = insn->ops[2];

        assert (res_reg_op.mode == MIR_OP_VAR && va_reg_op.mode == MIR_OP_VAR
                && op3.mode == (code == MIR_VA_ARG ? MIR_OP_VAR_MEM : MIR_OP_VAR));
        func_reg_op = _MIR_new_var_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
        reg_op3 = _MIR_new_var_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
        next_insn = new_insn
          = MIR_new_insn (ctx, MIR_MOV, func_reg_op, MIR_new_ref_op (ctx, func_import_item));
        gen_add_insn_before (gen_ctx, insn, new_insn);
        if (code == MIR_VA_ARG) {
          new_insn
            = MIR_new_insn (ctx, MIR_MOV, reg_op3, MIR_new_int_op (ctx, (int64_t) op3.u.mem.type));
          op3 = reg_op3;
          gen_add_insn_before (gen_ctx, insn, new_insn);
        }
        ops[0] = MIR_new_ref_op (ctx, proto_item);
        ops[1] = func_reg_op;
        ops[2] = res_reg_op;
        ops[3] = va_reg_op;
        ops[4] = op3;
        if (code == MIR_VA_BLOCK_ARG) ops[5] = insn->ops[3];
        new_insn = MIR_new_insn_arr (ctx, MIR_CALL, code == MIR_VA_ARG ? 5 : 6, ops);
        gen_add_insn_before (gen_ctx, insn, new_insn);
        gen_delete_insn (gen_ctx, insn);
      } else { /* Use builtin: mov freg, func ref; call proto, freg, res_reg, op_reg[, op_reg2] */
        MIR_op_t freg_op, res_reg_op = insn->ops[0], op_reg_op = insn->ops[1], ops[5];

        assert (res_reg_op.mode == MIR_OP_VAR && op_reg_op.mode == MIR_OP_VAR);
        freg_op = _MIR_new_var_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
        next_insn = new_insn
          = MIR_new_insn (ctx, MIR_MOV, freg_op, MIR_new_ref_op (ctx, func_import_item));
        gen_add_insn_before (gen_ctx, insn, new_insn);
        ops[0] = MIR_new_ref_op (ctx, proto_item);
        ops[1] = freg_op;
        ops[2] = res_reg_op;
        ops[3] = op_reg_op;
        if (nargs == 2) ops[4] = insn->ops[2];
        new_insn = MIR_new_insn_arr (ctx, MIR_CALL, nargs + 3, ops);
        gen_add_insn_before (gen_ctx, insn, new_insn);
        gen_delete_insn (gen_ctx, insn);
      }
    } else if (code == MIR_VA_START) {
      MIR_op_t prev_sp_op = _MIR_new_var_op (ctx, gen_new_temp_reg (gen_ctx, MIR_T_I64, func));
      MIR_op_t va_op = insn->ops[0];
      MIR_reg_t va_reg;

      assert (func->vararg_p && va_op.mode == MIR_OP_VAR);
      va_reg = va_op.u.reg;
      /* Insns can be not simplified as soon as they match a machine insn.  */
      /* __stack: prev_sp = mem64[fp + 16] */
      gen_mov (gen_ctx, insn, MIR_MOV, prev_sp_op,
               _MIR_new_var_mem_op (ctx, MIR_T_I64, 16, FP_HARD_REG, MIR_NON_VAR, 1));
      if (non_vararg_int_args_num != 8)
        gen_add_insn_before (gen_ctx, insn,
                             MIR_new_insn (ctx, MIR_ADD, prev_sp_op, prev_sp_op,
                                           MIR_new_int_op (ctx,
                                                           ((uint64_t) non_vararg_int_args_num - 8)
                                                             * 8)));
      gen_mov (gen_ctx, insn, MIR_MOV,
               _MIR_new_var_mem_op (ctx, MIR_T_I64, 0, va_reg, MIR_NON_VAR, 1), prev_sp_op);
      gen_delete_insn (gen_ctx, insn);
    } else if (code == MIR_VA_END) { /* do nothing */
      gen_delete_insn (gen_ctx, insn);
    } else if (MIR_call_code_p (code)) {
      machinize_call (gen_ctx, insn);
      leaf_p = FALSE;
    } else if (code == MIR_ALLOCA) {
      alloca_p = TRUE;
    } else if (code == MIR_RET) {
      /* In simplify we already transformed code for one return insn
         and added extension insn (if any).  */
      uint32_t n_xregs = 0, n_fpregs = 0;

      assert (func->nres == MIR_insn_nops (ctx, insn));
      for (i = 0; i < func->nres; i++) {
        assert (insn->ops[i].mode == MIR_OP_VAR);
        res_type = func->res_types[i];
        if ((res_type == MIR_T_F || res_type == MIR_T_D) && n_fpregs < 2) {
          new_insn_code = res_type == MIR_T_F ? MIR_FMOV : MIR_DMOV;
          ret_reg = FA0_HARD_REG + n_fpregs++;
        } else if (n_xregs < 2) {
          new_insn_code = res_type == MIR_T_LD ? MIR_LDMOV : MIR_MOV;
          ret_reg = A0_HARD_REG + n_xregs++;
          if (res_type == MIR_T_LD) n_xregs++;
        } else {
          (*MIR_get_error_func (ctx)) (MIR_ret_error,
                                       "riscv can not handle this combination of return values");
        }
        ret_reg_op = _MIR_new_var_op (ctx, ret_reg);
        /* We should return unsigned 32-bit integer with sign extension according to ABI: */
        gen_mov (gen_ctx, insn, res_type == MIR_T_U32 ? MIR_EXT32 : new_insn_code, ret_reg_op,
                 insn->ops[i]);
        insn->ops[i] = ret_reg_op;
      }
    }
  }
}

static void isave (gen_ctx_t gen_ctx, MIR_insn_t anchor, int disp, MIR_reg_t base,
                   MIR_reg_t hard_reg) {
  gen_mov (gen_ctx, anchor, MIR_MOV,
           _MIR_new_var_mem_op (gen_ctx->ctx, MIR_T_I64, disp, base, MIR_NON_VAR, 1),
           _MIR_new_var_op (gen_ctx->ctx, hard_reg));
}

static MIR_reg_t get_base_reg_offset_for_saved_regs (gen_ctx_t gen_ctx, MIR_insn_t anchor,
                                                     size_t *offset) {
  MIR_context_t ctx = gen_ctx->ctx;
  MIR_insn_t new_insn;
  MIR_reg_t base_reg;

  if (*offset + MAX_HARD_REG * 8 < (1 << 11)) return FP_HARD_REG;
  base_reg = T2_HARD_REG;
  gen_mov (gen_ctx, anchor, MIR_MOV, _MIR_new_var_op (ctx, base_reg),
           MIR_new_int_op (ctx, *offset));
  new_insn = MIR_new_insn (ctx, MIR_ADD, _MIR_new_var_op (ctx, base_reg),
                           _MIR_new_var_op (ctx, base_reg), _MIR_new_var_op (ctx, FP_HARD_REG));
  gen_add_insn_before (gen_ctx, anchor, new_insn);
  *offset = 0;
  return base_reg;
}

static void target_make_prolog_epilog (gen_ctx_t gen_ctx, bitmap_t used_hard_regs,
                                       size_t stack_slots_num) {
  MIR_context_t ctx = gen_ctx->ctx;
  MIR_func_t func;
  MIR_insn_t anchor, new_insn;
  MIR_op_t sp_reg_op, fp_reg_op, treg_op, treg_op2;
  MIR_reg_t base_reg;
  int64_t start;
  int save_prev_stack_p;
  size_t i, offset, frame_size, frame_size_after_saved_regs, saved_iregs_num, saved_fregs_num;

  assert (curr_func_item->item_type == MIR_func_item);
  func = curr_func_item->u.func;
  for (i = saved_iregs_num = saved_fregs_num = 0; i <= MAX_HARD_REG; i++)
    if (!target_call_used_hard_reg_p (i, MIR_T_UNDEF) && bitmap_bit_p (used_hard_regs, i)
        && i != FP_HARD_REG) {
      if (i < F0_HARD_REG)
        saved_iregs_num++;
      else
        saved_fregs_num++;
    }
  if (leaf_p && !alloca_p && saved_iregs_num == 0 && saved_fregs_num == 0 && !func->vararg_p
      && stack_slots_num == 0 && !block_arg_func_p && small_aggregate_save_area == 0
      && !bitmap_bit_p (used_hard_regs, RA_HARD_REG))
    return;
  sp_reg_op = _MIR_new_var_op (ctx, SP_HARD_REG);
  fp_reg_op = _MIR_new_var_op (ctx, FP_HARD_REG);
  /* Prologue: */
  anchor = DLIST_HEAD (MIR_insn_t, func->insns);
  frame_size = 0;
  if (func->vararg_p && non_vararg_int_args_num < 8) /* space for vararg int regs (a<n>..a7): */
    frame_size = (8 - non_vararg_int_args_num) * 8;
  for (i = 0; i <= MAX_HARD_REG; i++)
    if (!target_call_used_hard_reg_p (i, MIR_T_UNDEF) && bitmap_bit_p (used_hard_regs, i))
      frame_size += 8;
  if (frame_size % 16 != 0) frame_size = (frame_size + 15) / 16 * 16;
  frame_size_after_saved_regs = frame_size;
  frame_size += stack_slots_num * 8;
  if (frame_size % 16 != 0) frame_size = (frame_size + 15) / 16 * 16;
  save_prev_stack_p = func->vararg_p || block_arg_func_p;
  treg_op = _MIR_new_var_op (ctx, T1_HARD_REG);
  if (save_prev_stack_p) { /* the 1st insn: putting stack pointer into T1: */
    gen_mov (gen_ctx, anchor, MIR_MOV, treg_op, sp_reg_op);
    frame_size += 16;
  }
  frame_size += 16; /* ra/fp */
  if (frame_size < (1 << 11)) {
    new_insn = MIR_new_insn (ctx, MIR_ADD, sp_reg_op, sp_reg_op,
                             MIR_new_int_op (ctx, -(int64_t) frame_size));
  } else {
    treg_op2 = _MIR_new_var_op (ctx, T2_HARD_REG);
    new_insn = MIR_new_insn (ctx, MIR_MOV, treg_op2, MIR_new_int_op (ctx, -(int64_t) frame_size));
    gen_add_insn_before (gen_ctx, anchor, new_insn); /* t = -frame_size */
    new_insn = MIR_new_insn (ctx, MIR_ADD, sp_reg_op, sp_reg_op, treg_op2);
  }
  gen_add_insn_before (gen_ctx, anchor, new_insn); /* sp = sp - (frame_size|t) */
  if (save_prev_stack_p)                           /* save prev sp value which is in T1: */
    gen_mov (gen_ctx, anchor, MIR_MOV,
             _MIR_new_var_mem_op (ctx, MIR_T_I64, 16, SP_HARD_REG, MIR_NON_VAR, 1),
             treg_op); /* mem[sp + 16] = t1 */
  if (!func->jret_p)
    gen_mov (gen_ctx, anchor, MIR_MOV,
             _MIR_new_var_mem_op (ctx, MIR_T_I64, 8, SP_HARD_REG, MIR_NON_VAR, 1),
             _MIR_new_var_op (ctx, LINK_HARD_REG)); /* mem[sp + 8] = ra */
  gen_mov (gen_ctx, anchor, MIR_MOV,
           _MIR_new_var_mem_op (ctx, MIR_T_I64, 0, SP_HARD_REG, MIR_NON_VAR, 1),
           _MIR_new_var_op (ctx, FP_HARD_REG));             /* mem[sp] = fp */
  gen_mov (gen_ctx, anchor, MIR_MOV, fp_reg_op, sp_reg_op); /* fp = sp */
  if (func->vararg_p && non_vararg_int_args_num < 8) {      /* save vararg int regs: */
    MIR_reg_t base = SP_HARD_REG;
    int reg_save_area_size = 8 * (8 - non_vararg_int_args_num);

    start = (int64_t) frame_size - reg_save_area_size;
    if (start + reg_save_area_size >= (1 << 11)) {
      new_insn = MIR_new_insn (ctx, MIR_MOV, treg_op, MIR_new_int_op (ctx, start));
      gen_add_insn_before (gen_ctx, anchor, new_insn); /* t = frame_size - reg_save_area_size */
      start = 0;
      base = T1_HARD_REG;
    }
    for (MIR_reg_t r = non_vararg_int_args_num + A0_HARD_REG; r <= A7_HARD_REG; r++, start += 8)
      isave (gen_ctx, anchor, start, base, r);
  }
  /* Saving callee saved hard registers: */
  offset = frame_size - frame_size_after_saved_regs;
  base_reg = get_base_reg_offset_for_saved_regs (gen_ctx, anchor, &offset);
  for (i = 0; i <= MAX_HARD_REG; i++)
    if (!target_call_used_hard_reg_p (i, MIR_T_UNDEF) && bitmap_bit_p (used_hard_regs, i)
        && i != FP_HARD_REG) {
      if (i < F0_HARD_REG) {
        gen_assert (offset < (1 << 11));
        gen_mov (gen_ctx, anchor, MIR_MOV,
                 _MIR_new_var_mem_op (ctx, MIR_T_I64, offset, base_reg, MIR_NON_VAR, 1),
                 _MIR_new_var_op (ctx, i));
        offset += 8;
      } else {
        // if (offset % 16 != 0) offset = (offset + 15) / 16 * 16;
        gen_assert (offset < (1 << 11));
        new_insn = gen_mov (gen_ctx, anchor, MIR_DMOV,
                            _MIR_new_var_mem_op (ctx, MIR_T_D, offset, base_reg, MIR_NON_VAR, 1),
                            _MIR_new_var_op (ctx, i));
        offset += 8;
      }
    }
  if (small_aggregate_save_area != 0) {
    if (small_aggregate_save_area % 16 != 0)
      small_aggregate_save_area = (small_aggregate_save_area + 15) / 16 * 16;
    new_insn = MIR_new_insn (ctx, MIR_ADD, sp_reg_op, sp_reg_op,
                             MIR_new_int_op (ctx, -(int64_t) small_aggregate_save_area));
    gen_add_insn_before (gen_ctx, anchor, new_insn); /* sp -= <small aggr save area size> */
  }
  /* Epilogue: */
  for (anchor = DLIST_TAIL (MIR_insn_t, func->insns); anchor != NULL;
       anchor = DLIST_PREV (MIR_insn_t, anchor))
    if (anchor->code == MIR_RET || anchor->code == MIR_JRET) break;
  if (anchor == NULL) return;
  /* Restoring hard registers: */
  offset = frame_size - frame_size_after_saved_regs;
  base_reg = get_base_reg_offset_for_saved_regs (gen_ctx, anchor, &offset);
  for (i = 0; i <= MAX_HARD_REG; i++)
    if (!target_call_used_hard_reg_p (i, MIR_T_UNDEF) && bitmap_bit_p (used_hard_regs, i)
        && i != FP_HARD_REG) {
      if (i < F0_HARD_REG) {
        gen_assert (offset < (1 << 11));
        gen_mov (gen_ctx, anchor, MIR_MOV, _MIR_new_var_op (ctx, i),
                 _MIR_new_var_mem_op (ctx, MIR_T_I64, offset, base_reg, MIR_NON_VAR, 1));
        offset += 8;
      } else {
        gen_assert (offset < (1 << 11));
        new_insn = gen_mov (gen_ctx, anchor, MIR_DMOV, _MIR_new_var_op (ctx, i),
                            _MIR_new_var_mem_op (ctx, MIR_T_D, offset, base_reg, MIR_NON_VAR, 1));
        offset += 8;
      }
    }
  /* Restore ra, sp, fp */
  if (!func->jret_p)
    gen_mov (gen_ctx, anchor, MIR_MOV, _MIR_new_var_op (ctx, LINK_HARD_REG),
             _MIR_new_var_mem_op (ctx, MIR_T_I64, 8, FP_HARD_REG, MIR_NON_VAR, 1));
  if (frame_size < (1 << 11)) {
    new_insn = MIR_new_insn (ctx, MIR_ADD, sp_reg_op, fp_reg_op, MIR_new_int_op (ctx, frame_size));
  } else {
    new_insn = MIR_new_insn (ctx, MIR_MOV, treg_op, MIR_new_int_op (ctx, frame_size));
    gen_add_insn_before (gen_ctx, anchor, new_insn); /* t = frame_size */
    new_insn = MIR_new_insn (ctx, MIR_ADD, sp_reg_op, fp_reg_op, treg_op);
  }
  gen_add_insn_before (gen_ctx, anchor, new_insn); /* sp = fp + (frame_size|t) */
  gen_mov (gen_ctx, anchor, MIR_MOV, fp_reg_op,
           _MIR_new_var_mem_op (ctx, MIR_T_I64, 0, FP_HARD_REG, MIR_NON_VAR, 1));
}

/* 32-bit insn formats:
|31             25|24 20|19 15|14        12|11             7|6     0|
|funct7           | rs2 | rs1 |funct3      | rd             | opcode|  :R-type
|imm[11:0]              | rs1 |funct3      | rd             | opcode|  :I-type
|imm[11:5]        | rs2 | rs1 |funct3      |imm[4:0]        | opcode|  :S-type
|imm[31:12]                                | rd             | opcode|  :U-type
|imm[12]imm[10-5] | rs2 | rs1 |funct3      |imm[4-1]imm[11] | opcode|  :B-type
|imm[20]imm[10:1]imm[11]imm[19-12]         | rd             | opcode|  :J-type

16-bits insns:
Format Meaning		      |15 14 13|12|11 10|9 8 7|6 5|4 3 2|1 0|
CR     Register	              | funct4    | rd/rs1    |    rs2  | op|
CI     Immediate	      | funct3 |im| rd/rs1    |    imm  | op|
CSS    Stack-relative Store   | funct3 |    imm       |    rs2  | op|
CIW    Wide Immediate	      | funct3 |    imm           | rd' | op|
CL     Load		      | funct3 |   imm  | rs1'|imm| rd' | op|
CS     Store		      | funct3 |   imm  | rs1'|imm| rs2'| op|
CS     Reg insn		      | funct6          | rs1'|fn2| rs2'| op|
CB     Branch		      | funct3 | offset | rs1'| offset  | op|
CJ     Jump                   | funct3 |    jump target         | op|

RVC Register Number (rs1',rs2',rd')    000  001  010  011  100  101  110  111
Integer Register Number		       x8   x9   x10  x11  x12  x13  x14  x15
Integer Register ABI Name	       s0   s1   a0   a1   a2   a3   a4   a5
Floating-Point Register Number	       f8   f9   f10  f11  f12  f13  f14  f15
Floating-Point Register ABI Name       fs0  fs1  fa0  fa1  fa2  fa3  fa4  fa5

*/

struct pattern {
  MIR_insn_code_t code;
  /* Pattern elements:
     blank - ignore
     X - match everything
     $ - finish successfully matching
     r - register
     rp - register but sp
     C - compressed register
     h[0-63] - hard register with given number
     c<number> - immediate integer <number>

       memory with immediate offset:
     m[0-3] - int (signed or unsigned) type memory of size 8,16,32,64-bits
     ms[0-3] - signed int type memory of size 8,16,32,64-bits
     mu[0-3] - unsigned int type memory of size 8,16,32,64-bits
       sign extended 12-bit offset

     mc[s]2[s],mc3[s] - (signed) int memory of size 32 or 64-bits with compressed
         based register and 5-bit unsigned displacement scaled by 4 or 8
         or stack reg as base and 6-bit unsigned displacement scaled by 4 or 8

       memory with immediate offset:
     mf - memory of float
     md - memory of double
     mld - memory of long double (whose disp can be increased by 8)
       sign extended 12-bit offset

     mcd[s] - double memory with compressed based register (or stack register)
              and 6-bit unsigned displacement scaled by 8

     i -- 2nd or 3rd immediate op for arithmetic insn (12-bit signed)
     j -- as i but -j should be also i (it means excluding minimal 12-bit signed) and only 3rd op
     ju -- as j but but rounded to 16 first and only 2nd op
     iu -- 32-bit signed immediate for arithmetic insn with zero 12 bits as 2nd op
     ia -- any 32-bit signed immediate as 2nd op
     I --  any 64-bit immediate
     s --  immediate shift (5 bits) as 3th op
     S --  immediate shift (6 bits) as 3th op
     Sp --  nonzero immediate shift (6 bits) as 3th op
     l --  label as the 1st or 2nd op which can be present by signed 13-bit pc offset
     L --  label as the 1st or 2nd op which can be present by unsigned 20-bit pc offset
     U --  label used in LADDR as 2nd op which can be present by signed 32-bit pc offset

     k -- 2nd or 3rd immediate op for arithmetic insn (6-bit signed)
     kp -- nonzero 2nd or 3rd immediate op for arithmetic insn (6-bit signed)
     ks -- nonzero 2nd or 3rd immediate op for arithmetic insn (9-bit signed) multiple of 16
     ku -- 18-bit signed immediate for arithmetic insn with zero low 12-bits as 2nd op
     kw -- nonzero scaled by 4 8-bit unsigned immediate
     jus -- imm rounded to 16 first and considered as ks

     Remember we have no float or (long) double immediate at this stage. They are represented
     by a reference to data item.  */

  const char *pattern;
  /* Replacement elements (if insn size is not mentioned it is a 32-bit insn):
     blank - ignore
     ; - insn separation

     Ohex - opcode [6..0]
     Fhex - funct3 (or round mode rm) [14..12]
     fhex - funct7 [31..25]
     ghex - funct7 w/o 1 bit [31..26]

     ohex - 16-bit insn opcode [1..0] (opcodec)
     ahex - 16-bit insn funct3 [15..13] (funct3c)
     bhex - 16-bit insn funct4 [15..12] (funct4c)
     chex - 16-bit insn funct6 [15..10] (funct6c)
     dhex - 16-bit insn funct2 [6..5] (funct2c)
     ehex - 16-bit insn functb2 [11..10] (funct2bc)

     rd[0-2] - put n-th operand register into rd field [11..7] (16- and 32-bit insns)
     rs[0-2] - put n-th operand register into rs1 field [19..15]
     rS[0-2] - put n-th operand register into rs2 field [24..20]

     rt[0-2] - put n-th operand register into rs2 field [6..2] 16-bit insns
     ru[0-2] - put n-th operand register into rd'/rs1' field [9..7] 16-bit insns
     rv[0-2] - put n-th operand register into rs2' field [4..2] 16-bit insns

     h(d,s,S,t,u,v)<one or two hex digits> - hardware register with given number in
     rd,rs1,rs2,rd',rs1',rs2' field m = 1st or 2nd operand is (8-,16-,32-,64-bit) mem with base and
     signed disp

     ml = 1st or 2nd operand for load is mem with base (rs1), signed imm12 disp [31..20]
     ms = 1st or 2nd operand for store is mem with base (rs1), signed imm12 disp [31..25,11..7]

     mc[2-3],mcd = 1st or 2nd operand is mem of given type with base (rs1[9..7])
                   and scaled unsigned imm5 disp [12..10,6..5]
     mc[2-3]s[s],mcds[s] = 1st or 2nd operand is mem with stack reg as base and
                   scaled unsigned imm6 disp [12,6..2], last `s` means store disp [12..7]

     i -- 2nd or 3rd arithmetic op 12-bit immediate [31..20]
     j -- 3rd arithmetic op 12-bit immediate [31..20] with opposite sign
     ju -- j but j round up to 16 first and used only as 2nd operand
     iu -- 2nd arithmetic op immediate [31..12]
     ih -- 20-bit upper part [31..12] of 32-bit signed 2nd op
     il -- 12-bit lower part [31..20] of 32-bit signed 2nd op
     I -- 20-bit upper part [31..12] of 32-bit signed pc-relative address of 64 bit
          constant (2nd op) in the 1st word and 12-bit lower part [31..20] in the 2nd word
     s --  immediate shift [24-20]
     S --  immediate shift [25-20]
     Sp --  immediate shift [12,6:2], 16-bit insn
     shex --  immediate shift value [24-20]
     Shex --  immediate shift value [25-20]
     i[-]hex -- i with given value
     iuhex -- 20-bit immediate [31..12]
     T - 12-bit immediate which is 16 + alignment of the insn addr + 8 to 8 == (0,2,4,6)

     k - immediate in field [12, 6-2], 16-bit insn
     k[-]hex -- k with given value
     ku - [16:12] of immediate value in [12, 6-2], 16-bit insn
     ks - [9,4,6,8-7,5] of immediate value in [12, 6-2], 16-bit insn

     l -- operand-label as signed 13-bit offset ([12|10:5] as [31:25] and [4:1|11] as [11:7]),
          remember address of any insn is even
     L -- operand-label as signed 21-bit offset ([20|10:1|11|19:12] as [31:12])
     U -- operand-label as signed 32-bit offset in auipc,addi

     lc -- operand-label as signed 9-bit offset ([12..10,6-2]), 16-bit insn
     Lc -- operand-label as signed 12-bit offset ([12-2]), 16-bit insn
     [0-3] - an operand matching n-th operand (n should be less than given operand number)
  */
  const char *replacement;
};

#define COMPRESS_INSNS __riscv_compressed

static const struct pattern patterns[] = {
#if COMPRESS_INSNS
  {MIR_MOV, "r r", "o2 b8 rd0 rt1"}, /* c.mv rd,rs2 */
#endif
  {MIR_MOV, "r r", "O13 F0 rd0 rs1 i0"}, /* addi rd,rs1,0 */
#if COMPRESS_INSNS
  {MIR_MOV, "C mc3", "o0 a3 rv0 mc3"},   /* c.ld rd',mc3 */
  {MIR_MOV, "r mc3s", "o2 a3 rd0 mc3s"}, /* c.ldsp rd,mc3s */
#endif
  {MIR_MOV, "r m3", "O3 F3 rd0 ml"}, /* ld rd,m */
#if COMPRESS_INSNS
  {MIR_MOV, "mc3 C", "o0 a7 rv1 mc3"},    /* c.sd rd',mc3 */
  {MIR_MOV, "mc3s r", "o2 a7 rt1 mc3ss"}, /* c.sdsp rd,mc3s */
#endif
  {MIR_MOV, "m3 r", "O23 F3 rS1 ms"}, /* sd rs2,m */

#if COMPRESS_INSNS
  {MIR_MOV, "C mcs2", "o0 a2 rv0 mc2"},   /* c.lw rd',mc2 */
  {MIR_MOV, "r mcs2s", "o2 a2 rd0 mc2s"}, /* c.lwsp rd,mc2s */
#endif
  {MIR_MOV, "r ms2", "O3 F2 rd0 ml"}, /* lw rd,m */
  {MIR_MOV, "r mu2", "O3 F6 rd0 ml"}, /* lwu rd,m */
#if COMPRESS_INSNS
  {MIR_MOV, "mc2 C", "o0 a6 rv1 mc2"},    /* c.sw mc2,rd' */
  {MIR_MOV, "mc2s r", "o2 a6 rt1 mc2ss"}, /* c.swsp rd,mc2s */
#endif
  {MIR_MOV, "m2 r", "O23 F2 rS1 ms"}, /* sw rs2,m */

  {MIR_MOV, "r ms1", "O3 F1 rd0 ml"}, /* lh rd,m */
  {MIR_MOV, "r mu1", "O3 F5 rd0 ml"}, /* lhu rd,m */
  {MIR_MOV, "m1 r", "O23 F1 rS1 ms"}, /* sh rs2,m */

  {MIR_MOV, "r ms0", "O3 F0 rd0 ml"}, /* lb rd,m */
  {MIR_MOV, "r mu0", "O3 F4 rd0 ml"}, /* lbu rd,m */
  {MIR_MOV, "m0 r", "O23 F0 rS1 ms"}, /* sb rs2,m */

#if COMPRESS_INSNS
  {MIR_MOV, "r k", "o1 a2 rd0 k"}, /* c.li rd,k */
#endif
  {MIR_MOV, "r i", "O13 F0 rd0 hs0 i"}, /* addi r,zero,i */
#if COMPRESS_INSNS
  {MIR_MOV, "rp ku", "o1 a3 rd0 ku"}, /* c.lui rd,k */
#endif
  {MIR_MOV, "r iu", "O37 rd0 iu"}, /* lui r,i */
  //  {MIR_MOV, "r ia", "O37 rd0 ih; O13 F0 rd0 rs0 il"}, /* lui r,i; addi r,r,i */
  {MIR_MOV, "r I", "O17 rd0 I; O3 F3 rd0 rs0"}, /* auipc r,rel-caddr; ld r,rel-caddr(r) */

  {MIR_FMOV, "r r", "O53 F0 f10 rd0 rs1 rS1"}, /* fsgnj.s rd,rs1,rs2 */
  {MIR_FMOV, "r mf", "O7 F2 rd0 ml"},          /* flw rd,m */
  {MIR_FMOV, "mf r", "O27 F2 rS1 ms"},         /* fsw rd,m */

  {MIR_DMOV, "r r", "O53 F0 f11 rd0 rs1 rS1"}, /* fsgnj.d rd,rs1,rs2 */
#if COMPRESS_INSNS
  {MIR_DMOV, "C mcd", "o0 a1 rv0 mcd"},   /* c.fld rd',mcd */
  {MIR_DMOV, "r mcds", "o2 a1 rd0 mcds"}, /* c.fldsp rd,mcds */
#endif
  {MIR_DMOV, "r md", "O7 F3 rd0 ml"}, /* fld rd,m */
#if COMPRESS_INSNS
  {MIR_DMOV, "mcd C", "o0 a5 rv1 mcd"},    /* c.fsd rd',mcd */
  {MIR_DMOV, "mcds r", "o2 a5 rt1 mcdss"}, /* c.fsdsp rd,mcdss */
#endif
  {MIR_DMOV, "md r", "O27 F3 rS1 ms"}, /* fsd rd,m */

  /* LD values are always kept in memory.  We place them into int hard regs for passing
     args/returning values (see machinize).  We don't need insn replacement as we split
     load moves in target_translate: */
  {MIR_LDMOV, "r mld", ""}, /* int_reg <- mem */
  {MIR_LDMOV, "mld r", ""}, /* mem <- int_reg */
  /* mem <- mem by using temp fp regs: */
  {MIR_LDMOV, "mld mld", ""},

#define STR(c) #c
#define STR_VAL(c) STR (c)

  {MIR_UNSPEC, "c" STR_VAL (FMVXW_CODE) " r r", "O53 F0 f70 rd1 rs2"}, /* fmv.x.w r0,r1 */
  {MIR_UNSPEC, "c" STR_VAL (FMVXD_CODE) " r r", "O53 F0 f71 rd1 rs2"}, /* fmv.x.d r0,r1 */

  {MIR_EXT8, "r r",
   "O13 F1 rd0 rs1 S38; O13 F5 f20 rd0 rs0 S38"}, /* slli rd,rs1,56;srai rd,rs1,56 */
  {MIR_EXT16, "r r",
   "O13 F1 rd0 rs1 S30; O13 F5 f20 rd0 rs0 S30"}, /* slli rd,rs1,48;srai rd,rs1,48 */
  {MIR_EXT32, "r r", "O1b F0 rd0 rs1 i0"},        /* addiw rd,rs1,0 */

  {MIR_UEXT8, "r r",
   "O13 F1 rd0 rs1 S38; O13 F5 f0 rd0 rs0 S38"}, /* slli rd,rs1,56;srli rd,rs1,56 */
  {MIR_UEXT16, "r r",
   "O13 F1 rd0 rs1 S30; O13 F5 f0 rd0 rs0 S30"}, /* slli rd,rs1,48;srli rd,rs1,48 */
  {MIR_UEXT32, "r r",
   "O13 F1 rd0 rs1 S20; O13 F5 f0 rd0 rs0 S20"}, /* slli rd,rs1,32;srli rd,rs1,32 */

#if COMPRESS_INSNS
  {MIR_ADD, "r 0 r", "o2 b9 rd0 rt2"}, /* c.add rd,rd,rs2 */
#endif
  {MIR_ADD, "r r r", "O33 F0 rd0 rs1 rS2"}, /* add rd,rs1,rs2 */
#if COMPRESS_INSNS
  {MIR_ADD, "h2 0 ks", "o1 a3 rd0 ks"}, /* c.addi16sp sp,sp,i */
  {MIR_ADD, "r 0 kp", "o1 a0 rd0 k"},   /* c.addi rd,rd,i */
  {MIR_ADD, "C h2 kw", "o0 a0 rv0 kw"}, /* c.addi4spn rd',i */
#endif
  {MIR_ADD, "r r i", "O13 F0 rd0 rs1 i"}, /* addi rd,rs1,i */
#if COMPRESS_INSNS
  {MIR_ADDS, "C 0 C", "o1 c27 d1 ru0 rv2"}, /* c.addw rd',rd',rs2' */
#endif
  {MIR_ADDS, "r r r", "O3b F0 rd0 rs1 rS2"}, /* addw rd,rs1,rs2 */
#if COMPRESS_INSNS
  {MIR_ADDS, "r 0 k", "o1 a1 rd0 k"}, /* c.addiw rd,rd,i */
#endif
  {MIR_ADDS, "r r i", "O1b F0 rd0 rs1 i"},      /* addiw rd,rs1,i */
  {MIR_FADD, "r r r", "O53 F7 f0 rd0 rs1 rS2"}, /* fadd.s rd,rs1,rs2 */
  {MIR_DADD, "r r r", "O53 F7 f1 rd0 rs1 rS2"}, /* fadd.d rd,rs1,rs2 */
// ldadd is implemented through builtin

#if COMPRESS_INSNS
  {MIR_SUB, "C 0 C", "o1 c23 d0 ru0 rv2"}, /* c.sub rd',rd',rs2' */
#endif
  {MIR_SUB, "r r r", "O33 F0 f20 rd0 rs1 rS2"}, /* sub rd,rs1,rs2 */
  {MIR_SUB, "r r j", "O13 F0 rd0 rs1 j"},       /* addi rd,rs1,-j */
#if COMPRESS_INSNS
  {MIR_SUBS, "C 0 C", "o1 c27 d0 ru0 rv2"}, /* c.subw rd',rd',rs2' */
#endif
  {MIR_SUBS, "r r r", "O3b F0 f20 rd0 rs1 rS2"}, /* subw rd,rs1,rs2 */
  {MIR_SUBS, "r r j", "O1b F0 rd0 rs1 j"},       /* addiw rd,rs1,-j */
  {MIR_FSUB, "r r r", "O53 F7 f4 rd0 rs1 rS2"},  /* fsub.s rd,rs1,rs2 */
  {MIR_DSUB, "r r r", "O53 F7 f5 rd0 rs1 rS2"},  /* fsub.d rd,rs1,rs2 */
  // ldsub is implemented through builtin

  {MIR_MUL, "r r r", "O33 F0 f1 rd0 rs1 rS2"},  /* mul rd,rs1,rs2 */
  {MIR_MULS, "r r r", "O3b F0 f1 rd0 rs1 rS2"}, /* mulw rd,rs1,rs2 */
  {MIR_FMUL, "r r r", "O53 F7 f8 rd0 rs1 rS2"}, /* fmul.s rd,rs1,rs2*/
  {MIR_DMUL, "r r r", "O53 F7 f9 rd0 rs1 rS2"}, /* fmul.d rd,rs1,rs2*/
  // ldmul is implemented through builtin

  {MIR_DIV, "r r r", "O33 F4 f1 rd0 rs1 rS2"},   /* div rd,rs1,rs2 */
  {MIR_DIVS, "r r r", "O3b F4 f1 rd0 rs1 rS2"},  /* divw rd,rs1,rs2 */
  {MIR_UDIV, "r r r", "O33 F5 f1 rd0 rs1 rS2"},  /* divu rd,rs1,rs2 */
  {MIR_UDIVS, "r r r", "O3b F5 f1 rd0 rs1 rS2"}, /* divuw rd,rs1,rs2 */
  {MIR_FDIV, "r r r", "O53 F7 fc rd0 rs1 rS2"},  /* fdiv.s rd,rs1,rs2*/
  {MIR_DDIV, "r r r", "O53 F7 fd rd0 rs1 rS2"},  /* fdiv.d rd,rs1,rs2*/
  // lddiv is implemented through builtin

  {MIR_MOD, "r r r", "O33 F6 f1 rd0 rs1 rS2"},   /* rem rd,rs1,rs2 */
  {MIR_MODS, "r r r", "O3b F6 f1 rd0 rs1 rS2"},  /* remw rd,rs1,rs2 */
  {MIR_UMOD, "r r r", "O33 F7 f1 rd0 rs1 rS2"},  /* remu rd,rs1,rs2 */
  {MIR_UMODS, "r r r", "O3b F7 f1 rd0 rs1 rS2"}, /* remuw rd,rs1,rs2 */

  {MIR_EQ, "r r r",
   "O33 F0 f20 rd0 rs1 rS2; O13 F3 rd0 rs0 i1"},            /* sub rd,rs1,rs2; sltiu rd,rs1,1 */
  {MIR_EQ, "r r j", "O13 F0 rd0 rs1 j; O13 F3 rd0 rs0 i1"}, /* addi rd,rs1,-j; sltiu rd,rs1,1 */
  {MIR_EQS, "r r r",
   "O3b F0 f20 rd0 rs1 rS2; O13 F3 rd0 rs0 i1"},             /* subw rd,rs1,rs2; sltiu rd,rs1,1 */
  {MIR_EQS, "r r j", "O1b F0 rd0 rs1 j; O13 F3 rd0 rs0 i1"}, /* addiw rd,rs1,-j; sltiu rd,rs1,1 */

  {MIR_NE, "r r r",
   "O33 F0 f20 rd0 rs1 rS2; O33 F3 rd0 hs0 rS0"},            /* sub rd,rs1,rs2; sltu rd,z,rs2 */
  {MIR_NE, "r r j", "O13 F0 rd0 rs1 j; O33 F3 rd0 hs0 rS0"}, /* addi rd,rs1,-j; sltu rd,z,rs2 */
  {MIR_NES, "r r r",
   "O33 F0 f20 rd0 rs1 rS2; O33 F3 rd0 hs0 rS0"},             /* sub rd,rs1,rs2; sltu rd,z,rs2 */
  {MIR_NES, "r r j", "O13 F0 rd0 rs1 j; O33 F3 rd0 hs0 rS0"}, /* addi rd,rs1,-j; sltu rd,z,rs2 */

  {MIR_LT, "r r r", "O33 F2 f0 rd0 rs1 rS2"},   /* slt rd,rs1,rs2 */
  {MIR_LT, "r r i", "O13 F2 f0 rd0 rs1 i"},     /* slti rd,rs1,i */
  {MIR_LTS, "r r r", "O33 F2 f0 rd0 rs1 rS2"},  /* slt rd,rs1,rs2 */
  {MIR_LTS, "r r i", "O13 F2 f0 rd0 rs1 i"},    /* slti rd,rs1,i */
  {MIR_ULT, "r r r", "O33 F3 f0 rd0 rs1 rS2"},  /* sltu rd,rs1,rs2 */
  {MIR_ULT, "r r i", "O13 F3 f0 rd0 rs1 i"},    /* sltiu rd,rs1,i */
  {MIR_ULTS, "r r r", "O33 F3 f0 rd0 rs1 rS2"}, /* sltu rd,rs1,rs2 */
  {MIR_ULTS, "r r i", "O13 F3 f0 rd0 rs1 i"},   /* sltiu rd,rs1,i */

  // ??? le r,imm -> lt r,imm+1
  /* sgt rd,rs1,rs2;xori rd,rs1,1 */
  {MIR_LE, "r r r", "O33 F2 f0 rd0 rs2 rS1; O13 F4 f0 rd0 rs0 i1"},
  /* sgti rd,rs1,i;xori rd,rs1,1 */
  {MIR_LE, "r i r", "O13 F2 f0 rd0 rs2 i; O13 F4 f0 rd0 rs0 i1"},
  /* sgt rd,rs1,rs2;xori rd,rs1,1 */
  {MIR_LES, "r r r", "O33 F2 f0 rd0 rs2 rS1; O13 F4 f0 rd0 rs0 i1"},
  /* sgti rd,rs1,i;xori rd,rs1,1 */
  {MIR_LES, "r i r", "O13 F2 f0 rd0 rs2 i; O13 F4 f0 rd0 rs0 i1"},
  /* sgtu rd,rs1,rs2;xori rd,rs1,1 */
  {MIR_ULE, "r r r", "O33 F3 f0 rd0 rs2 rS1; O13 F4 f0 rd0 rs0 i1"},
  /* sgtui rd,rs1,i;xori rd,rs1,1 */
  {MIR_ULE, "r i r", "O13 F3 f0 rd0 rs2 i; O13 F4 f0 rd0 rs0 i1"},
  /* sgtu rd,rs1,rs2;xori rd,rs1,1 */
  {MIR_ULES, "r r r", "O33 F3 f0 rd0 rs2 rS1; O13 F4 f0 rd0 rs0 i1"},
  /* sgtui rd,rs1,i;xori rd,rs1,1 */
  {MIR_ULES, "r i r", "O13 F3 f0 rd0 rs2 i; O13 F4 f0 rd0 rs0 i1"},

  {MIR_GT, "r r r", "O33 F2 f0 rd0 rs2 rS1"},   /* slt rd,rs1,rs2 */
  {MIR_GT, "r i r", "O13 F2 f0 rd0 rs2 i"},     /* slti rd,rs1,i */
  {MIR_GTS, "r r r", "O33 F2 f0 rd0 rs2 rS1"},  /* slt rd,rs1,rs2 */
  {MIR_GTS, "r i r", "O13 F2 f0 rd0 rs2 i"},    /* slti rd,rs1,i */
  {MIR_UGT, "r r r", "O33 F3 f0 rd0 rs2 rS1"},  /* sltu rd,rs1,rs2 */
  {MIR_UGT, "r i r", "O13 F3 f0 rd0 rs2 i"},    /* sltiu rd,rs1,i */
  {MIR_UGTS, "r r r", "O33 F3 f0 rd0 rs2 rS1"}, /* sltu rd,rs1,rs2 */
  {MIR_UGTS, "r i r", "O13 F3 f0 rd0 rs2 i"},   /* sltiu rd,rs1,i */

  /* slt rd,rs1,rs2;xori rd,rs1,1 */
  {MIR_GE, "r r r", "O33 F2 f0 rd0 rs1 rS2; O13 F4 f0 rd0 rs0 i1"},
  /* slti rd,rs1,i;xori rd,rs1,1 */
  {MIR_GE, "r r i", "O13 F2 f0 rd0 rs1 i; O13 F4 f0 rd0 rs0 i1"},
  /* slt rd,rs1,rs2;xori rd,rs1,1 */
  {MIR_GES, "r r r", "O33 F2 f0 rd0 rs1 rS2; O13 F4 f0 rd0 rs0 i1"},
  /* slti rd,rs1,i;xori rd,rs1,1 */
  {MIR_GES, "r r i", "O13 F2 f0 rd0 rs1 i; O13 F4 f0 rd0 rs0 i1"},
  /* sltu rd,rs1,rs2;xori rd,rs1,1 */
  {MIR_UGE, "r r r", "O33 F3 f0 rd0 rs1 rS2; O13 F4 f0 rd0 rs0 i1"},
  /* sltui rd,rs1,i;xori rd,rs1,1 */
  {MIR_UGE, "r r i", "O13 F3 f0 rd0 rs1 i; O13 F4 f0 rd0 rs0 i1"},
  /* sltu rd,rs1,rs2;xori rd,rs1,1 */
  {MIR_UGES, "r r r", "O33 F3 f0 rd0 rs1 rS2; O13 F4 f0 rd0 rs0 i1"},
  /* sltui rd,rs1,i;xori rd,rs1,1 */
  {MIR_UGES, "r r i", "O13 F3 f0 rd0 rs1 i; O13 F4 f0 rd0 rs0 i1"},

  {MIR_FEQ, "r r r", "O53 F2 f50 rd0 rs1 rS2"}, /* feq.s rd,rs1,rs2 */
  {MIR_DEQ, "r r r", "O53 F2 f51 rd0 rs1 rS2"}, /* feq.d rd,rs1,rs2 */
  {MIR_FNE, "r r r",
   "O53 F2 f50 rd0 rs1 rS2; O13 F4 rd0 rs0 i1"}, /* feq.s rd,rs1,rs2; xori rd,r1,1 */
  {MIR_DNE, "r r r",
   "O53 F2 f51 rd0 rs1 rS2; O13 F4 rd0 rs0 i1"}, /* feq.d rd,rs1,rs2;xori rd,rs1,1 */
  {MIR_FLT, "r r r", "O53 F1 f50 rd0 rs1 rS2"},  /* flt.s rd,rs1,rs2 */
  {MIR_DLT, "r r r", "O53 F1 f51 rd0 rs1 rS2"},  /* flt.d rd,rs1,rs2 */
  {MIR_FLE, "r r r", "O53 F0 f50 rd0 rs1 rS2"},  /* fle.s rd,rs1,rs2 */
  {MIR_DLE, "r r r", "O53 F0 f51 rd0 rs1 rS2"},  /* fle.d rd,rs1,rs2 */
  {MIR_FGT, "r r r", "O53 F1 f50 rd0 rs2 rS1"},  /* flt.s rd,rs1,rs2 */
  {MIR_DGT, "r r r", "O53 F1 f51 rd0 rs2 rS1"},  /* flt.d rd,rs1,rs2 */
  {MIR_FGE, "r r r", "O53 F0 f50 rd0 rs2 rS1"},  /* fle.s rd,rs1,rs2 */
  {MIR_DGE, "r r r", "O53 F0 f51 rd0 rs2 rS1"},  /* fle.d rd,rs1,rs2 */

  {MIR_JMP, "L", "O6f hd0 L"}, /* jal: 20-bit offset (w/o 1 bit) jmp */

  {MIR_LADDR, "r U", "O17 rd0 U; O13 F0 rd0 rs0"}, /* auipc r,hi(l);addi r,r,low(L) */
  {MIR_JMPI, "r", "O67 F0 hd0 rs0 i0"},            /* jmp *r: jalr zero,r,0 */

  {MIR_BT, "l r", "O63 F1 rs1 hS0 l"},  /* bne rs1,zero,l */
  {MIR_BTS, "l r", "O63 F1 rs1 hS0 l"}, /* bne rs1,zero,l */
  {MIR_BF, "l r", "O63 F0 rs1 hS0 l"},  /* beq rs1,zero,l */
  {MIR_BFS, "l r", "O63 F0 rs1 hS0 l"}, /* beq rs1,zero,l */

  {MIR_BEQ, "l r r", "O63 F0 rs1 rS2 l"},  /* beq rs1,rs2,l */
  {MIR_BEQS, "l r r", "O63 F0 rs1 rS2 l"}, /* beq rs1,rs2,l */

  {MIR_BNE, "l r r", "O63 F1 rs1 rS2 l"},  /* bne rs1,rs2,l */
  {MIR_BNES, "l r r", "O63 F1 rs1 rS2 l"}, /* bne rs1,rs2,l */

  {MIR_BLT, "l r r", "O63 F4 rs1 rS2 l"},   /* blt rs1,rs2,l */
  {MIR_BLTS, "l r r", "O63 F4 rs1 rS2 l"},  /* blt rs1,rs2,l */
  {MIR_UBLT, "l r r", "O63 F6 rs1 rS2 l"},  /* bltu rs1,rs2,l */
  {MIR_UBLTS, "l r r", "O63 F6 rs1 rS2 l"}, /* bltu rs1,rs2,l */

  {MIR_BGE, "l r r", "O63 F5 rs1 rS2 l"},   /* bge rs1,rs2,l */
  {MIR_BGES, "l r r", "O63 F5 rs1 rS2 l"},  /* bge rs1,rs2,l */
  {MIR_UBGE, "l r r", "O63 F7 rs1 rS2 l"},  /* bgeu rs1,rs2,l */
  {MIR_UBGES, "l r r", "O63 F7 rs1 rS2 l"}, /* bgeu rs1,rs2,l */

  {MIR_BGT, "l r r", "O63 F4 rs2 rS1 l"},   /* blt rs1,rs2,l */
  {MIR_BGTS, "l r r", "O63 F4 rs2 rS1 l"},  /* blt rs1,rs2,l */
  {MIR_UBGT, "l r r", "O63 F6 rs2 rS1 l"},  /* bltu rs1,rs2,l */
  {MIR_UBGTS, "l r r", "O63 F6 rs2 rS1 l"}, /* bltu rs1,rs2,l */

  {MIR_BLE, "l r r", "O63 F5 rs2 rS1 l"},   /* bge rs1,rs2,l */
  {MIR_BLES, "l r r", "O63 F5 rs2 rS1 l"},  /* bge rs1,rs2,l */
  {MIR_UBLE, "l r r", "O63 F7 rs2 rS1 l"},  /* bgeu rs1,rs2,l */
  {MIR_UBLES, "l r r", "O63 F7 rs2 rS1 l"}, /* bgeu rs1,rs2,l */
  // there are no FBx,DBx,LDBx as they are machinized into compare and BT

  {MIR_NEG, "r r", "O33 F0 f20 rd0 hs0 rS1"},  /* sub rd,z,rs2 */
  {MIR_NEGS, "r r", "O3b F0 f20 rd0 hs0 rS1"}, /* subw rd,z,rs2 */
  {MIR_FNEG, "r r", "O53 F1 f10 rd0 rs1 rS1"}, /* fsgnjn.s rd,rs1,rs2 */
  {MIR_DNEG, "r r", "O53 F1 f11 rd0 rs1 rS1"}, /* fsgnjn.d rd,rs1,rs2 */
  // ldneg is a builtin

  {MIR_LSH, "r r r", "O33 F1 f0 rd0 rs1 rS2"},  /* sll rd,rs1,rs2 */
  {MIR_LSHS, "r r r", "O3b F1 f0 rd0 rs1 rS2"}, /* sllw rd,rs1,rs2 */
#if COMPRESS_INSNS
  {MIR_LSH, "r 0 Sp", "o2 a0 rd0 Sp"}, /* c.slli rd,rd,sh */
#endif
  {MIR_LSH, "r r S", "O13 F1 f0 rd0 rs1 S"},  /* slli rd,rs1,sh */
  {MIR_LSHS, "r r s", "O1b F1 f0 rd0 rs1 s"}, /* slliw rd,rs1,sh */

  {MIR_RSH, "r r r", "O33 F5 f20 rd0 rs1 rS2"},  /* sra rd,rs1,rs2 */
  {MIR_RSHS, "r r r", "O3b F5 f20 rd0 rs1 rS2"}, /* sraw rd,rs1,rs2 */
#if COMPRESS_INSNS
  {MIR_RSH, "C 0 Sp", "o1 a4 e1 ru0 Sp"}, /* c.srai rd',rd',sh */
#endif
  {MIR_RSH, "r r S", "O13 F5 f20 rd0 rs1 S"},  /* srai rd,rs1,sh */
  {MIR_RSHS, "r r s", "O1b F5 f20 rd0 rs1 s"}, /* sraiw rd,rs1,sh */

  {MIR_URSH, "r r r", "O33 F5 f0 rd0 rs1 rS2"},  /* srl rd,rs1,rs2 */
  {MIR_URSHS, "r r r", "O3b F5 f0 rd0 rs1 rS2"}, /* srlw rd,rs1,rs2 */
#if COMPRESS_INSNS
  {MIR_URSH, "C 0 Sp", "o1 a4 e0 ru0 Sp"}, /* c.srli rd',rd',sh */
#endif
  {MIR_URSH, "r r S", "O13 F5 f0 rd0 rs1 S"},  /* srli rd,rs1,rs2 */
  {MIR_URSHS, "r r s", "O1b F5 f0 rd0 rs1 s"}, /* srliw rd,rs1,sh */

#if COMPRESS_INSNS
  {MIR_AND, "C 0 C", "o1 c23 d3 ru0 rv2"}, /* c.and rd',rd',rs2' */
#endif
  {MIR_AND, "r r r", "O33 F7 f0 rd0 rs1 rS2"}, /* and rd,rs1,rs2 */
#if COMPRESS_INSNS
  {MIR_AND, "C 0 k", "o1 a4 e2 ru0 k"}, /* c.andi rd',rd',i */
#endif
  {MIR_AND, "r r i", "O13 F7 f0 rd0 rs1 i"}, /* andi rd,rs1,i */
#if COMPRESS_INSNS
  {MIR_ANDS, "C 0 C", "o1 c23 d3 ru0 rv2"}, /* c.and rd',rd',rs2' */
#endif
  {MIR_ANDS, "r r r", "O33 F7 f0 rd0 rs1 rS2"}, /* and rd,rs1,rs2 */
#if COMPRESS_INSNS
  {MIR_ANDS, "C 0 k", "o1 a4 e2 ru0 k"}, /* c.andi rd',rd',i */
#endif
  {MIR_ANDS, "r r i", "O13 F7 f0 rd0 rs1 i"}, /* andi rd,rs1,i */

#if COMPRESS_INSNS
  {MIR_OR, "C 0 C", "o1 c23 d2 ru0 rv2"}, /* c.or rd',rd',rs2' */
#endif
  {MIR_OR, "r r r", "O33 F6 f0 rd0 rs1 rS2"}, /* or rd,rs1,rs2 */
  {MIR_OR, "r r i", "O13 F6 f0 rd0 rs1 i"},   /* ori rd,rs1,i */
#if COMPRESS_INSNS
  {MIR_ORS, "C 0 C", "o1 c23 d2 ru0 rv2"}, /* c.or rd',rd',rs2' */
#endif
  {MIR_ORS, "r r r", "O33 F6 f0 rd0 rs1 rS2"}, /* or rd,rs1,rs2 */
  {MIR_ORS, "r r i", "O13 F6 f0 rd0 rs1 i"},   /* ori rd,rs1,i */

#if COMPRESS_INSNS
  {MIR_XOR, "C 0 C", "o1 c23 d1 ru0 rv2"}, /* c.xor rd',rd',rs2' */
#endif
  {MIR_XOR, "r r r", "O33 F4 f0 rd0 rs1 rS2"}, /* xor rd,rs1,rs2 */
  {MIR_XOR, "r r i", "O13 F4 f0 rd0 rs1 i"},   /* xori rd,rs1,i */
#if COMPRESS_INSNS
  {MIR_XORS, "C 0 C", "o1 c23 d1 ru0 rv2"}, /* c.xor rd',rd',rs2' */
#endif
  {MIR_XORS, "r r r", "O33 F4 f0 rd0 rs1 rS2"}, /* xor rd,rs1,rs2 */
  {MIR_XORS, "r r i", "O13 F4 f0 rd0 rs1 i"},   /* xori rd,rs1,i */

  {MIR_I2F, "r r", "O53 F7 f68 hS2 rd0 rs1"},  /* fcvt.s.l rd,rs1 */
  {MIR_I2D, "r r", "O53 F7 f69 hS2 rd0 rs1"},  /* fcvt.d.l rd,rs1 */
  {MIR_UI2F, "r r", "O53 F7 f68 hS3 rd0 rs1"}, /* fcvt.s.lu rd,rs1 */
  {MIR_UI2D, "r r", "O53 F7 f69 hS3 rd0 rs1"}, /* fcvt.d.lu rd,rs1 */

  {MIR_F2I, "r r", "O53 F1 f60 hS2 rd0 rs1"}, /* fcvt.l.s rd,rs1,rtz */
  {MIR_D2I, "r r", "O53 F1 f61 hS2 rd0 rs1"}, /* fcvt.l.d rd,rs1,rtz */
  {MIR_F2D, "r r", "O53 F0 f21 hS0 rd0 rs1"}, /* fcvt.d.s rd,rs1 -- never round */
  {MIR_D2F, "r r", "O53 F7 f20 hS1 rd0 rs1"}, /* fcvt.s.d rd,rs1 */
// i2ld, ui2ld, ld2i, f2ld, d2ld, ld2f, ld2d are builtins

#if COMPRESS_INSNS
  {MIR_CALL, "X r $", "o2 b9 rd1"},   /* c.jalr rd */
  {MIR_INLINE, "X r $", "o2 b9 rd1"}, /* c.jalr rd */
  {MIR_RET, "$", "o2 b8 hd1"},        /* c.jr ra  */
#endif
  {MIR_CALL, "X r $", "O67 F0 hd1 rs1 i0"},   /* jalr rd,rs1 */
  {MIR_INLINE, "X r $", "O67 F0 hd1 rs1 i0"}, /* jalr rd,rs1 */
  {MIR_RET, "$", "O67 F0 hd0 hs1 i0"},        /* jalr hr0,hr1,0  */

  {MIR_JCALL, "X r $", "O67 F0 hd0 rs1 i0"}, /* jmp *r: jalr zero,r,0 */
  {MIR_JRET, "r $", "O67 F0 hd0 rs0 i0"},    /* jmp *r: jalr zero,r,0 */

#if COMPRESS_INSNS
  /* addi r0,r0,15; andi r0,r0,-16; c.sub sp,sp,r0; c.mov r0,sp: */
  {MIR_ALLOCA, "C 0",
   "o1 a0 rd0 kf; o1 a4 e2 ru0 k-10;"        /* c.addi r0,r0,15; c.andi r0,r0,-16 */
   "O33 F0 f20 hd2 hs2 rS0; o2 b8 rd0 ht2"}, /* sub sp,sp,r0; c.mv r0,sp */
  /* addi r0,r1,15; c.andi r0,r0,-16; c.sub sp,sp,r0; c.mov r0,sp: */
  {MIR_ALLOCA, "C r",
   "O13 F0 rd0 rs1 if; o1 a4 e2 ru0 k-10;"   /* addi r0,r1,15; c.andi r0,r0,-16 */
   "O33 F0 f20 hd2 hs2 rS0; o2 b8 rd0 ht2"}, /* sub sp,sp,r0; c.mv r0,sp */
  /* addi r0,r1,15; andi r0,r0,-16; sub sp,sp,r0; c.mov r0,sp: */
  {MIR_ALLOCA, "r r",
   "O13 F0 rd0 rs1 if; O13 F7 f0 rd0 rs0 i-10;" /* addi r0,r1,15; andi r0,r0,-16 */
   "O33 F0 f20 hd2 hs2 rS0; o2 b8 rd0 ht2"},    /* sub sp,sp,r0; c.mv r0,sp */
  /* c.addi16sp sp,sp,-roundup(imm,16); c.mv r0,sp: */
  {MIR_ALLOCA, "r jus", "o1 a3 hd2 jus; o2 b8 rd0 ht2"},
  /* addi sp,sp,-roundup(imm,16); c.mv r0,sp: */
  {MIR_ALLOCA, "r ju", "O13 F0 hd2 hs2 ju; o2 b8 rd0 ht2"},
#else
  /* addi r0,r1,15; andi r0,r0,-16; sub sp,sp,r0; mov r0,sp: */
  {MIR_ALLOCA, "r r",
   "O13 F0 rd0 rs1 if; O13 F7 f0 rd0 rs0 i-10;"  /* addi r0,r1,15; andi r0,r0,-16 */
   "O33 F0 f20 hd2 hs2 rS0; O13 F0 rd0 hs2 i0"}, /* sub sp,sp,r0; addi r0,sp,0 */
  /* addi sp,sp,-roundup(imm,16); c.mv r0,sp: */
  {MIR_ALLOCA, "r ju", "O13 F0 hd2 hs2 ju; O13 F0 rd0 hs2 i0"},
#endif

#if COMPRESS_INSNS
  {MIR_BSTART, "r", "o2 b8 rd0 ht2"}, /* r = sp: c.mv rd,rs2 */
  {MIR_BEND, "r", "o2 b8 hd2 rt0"},   /* sp = r: c.mv rd,rs2 */
#else
  {MIR_BSTART, "r", "O13 F0 rd0 hs2 i0"}, /* r = sp: addi rd,rs1,0 */
  {MIR_BEND, "r", "O13 F0 hd2 rs0 i0"},   /* sp = r: addi rd,rs1,0 */
#endif
  /* slli t5,r,3; auipc t6,0; add t6,t6,t5;ld t6,T(t6);jalr zero,t6,0;
     8-byte aligned TableContent.  Remember r can be t5 can be if switch operand is memory. */
  {MIR_SWITCH, "r $",
   "O13 F1 hd1e rs0 S3; O17 hd1f iu0; O33 F0 hd1f hs1f hS1e; O3 F3 hd1f hs1f T; O67 F0 hd0 hs1f "
   "i0"},

};

static void target_get_early_clobbered_hard_regs (MIR_insn_t insn, MIR_reg_t *hr1, MIR_reg_t *hr2) {
  *hr1 = *hr2 = MIR_NON_VAR;
  if (insn->code == MIR_MOD || insn->code == MIR_MODS || insn->code == MIR_UMOD
      || insn->code == MIR_UMODS)
    *hr1 = R8_HARD_REG;
}

static int pattern_index_cmp (const void *a1, const void *a2) {
  int i1 = *(const int *) a1, i2 = *(const int *) a2;
  int c1 = (int) patterns[i1].code, c2 = (int) patterns[i2].code;

  return c1 != c2 ? c1 - c2 : (long) i1 - (long) i2;
}

static void patterns_init (gen_ctx_t gen_ctx) {
  int i, ind, n = sizeof (patterns) / sizeof (struct pattern);
  MIR_insn_code_t prev_code, code;
  insn_pattern_info_t *info_addr;
  insn_pattern_info_t pinfo = {0, 0};

  VARR_CREATE (int, pattern_indexes, 0);
  for (i = 0; i < n; i++) VARR_PUSH (int, pattern_indexes, i);
  qsort (VARR_ADDR (int, pattern_indexes), n, sizeof (int), pattern_index_cmp);
  VARR_CREATE (insn_pattern_info_t, insn_pattern_info, 0);
  for (i = 0; i < MIR_INSN_BOUND; i++) VARR_PUSH (insn_pattern_info_t, insn_pattern_info, pinfo);
  info_addr = VARR_ADDR (insn_pattern_info_t, insn_pattern_info);
  for (prev_code = MIR_INSN_BOUND, i = 0; i < n; i++) {
    ind = VARR_GET (int, pattern_indexes, i);
    if ((code = patterns[ind].code) != prev_code) {
      if (i != 0) info_addr[prev_code].num = i - info_addr[prev_code].start;
      info_addr[code].start = i;
      prev_code = code;
    }
  }
  assert (prev_code != MIR_INSN_BOUND);
  info_addr[prev_code].num = n - info_addr[prev_code].start;
}

static int dec_value (int ch) { return '0' <= ch && ch <= '9' ? ch - '0' : -1; }

static uint64_t read_dec (const char **ptr) {
  int v;
  const char *p;
  uint64_t res = 0;

  for (p = *ptr; (v = dec_value (*p)) >= 0; p++) {
    gen_assert ((res >> 60) == 0);
    res = res * 10 + v;
  }
  gen_assert (p != *ptr);
  *ptr = p - 1;
  return res;
}

static int compressed_reg_p (MIR_reg_t reg, int int_only_p) {
  if (R8_HARD_REG <= reg && reg <= R15_HARD_REG) return TRUE;
  if (!int_only_p && F8_HARD_REG <= reg && reg <= F15_HARD_REG) return TRUE;
  return FALSE;
}

static int pattern_match_p (gen_ctx_t gen_ctx, const struct pattern *pat, MIR_insn_t insn) {
  MIR_context_t ctx = gen_ctx->ctx;
  int n;
  size_t nop, nops = MIR_insn_nops (ctx, insn);
  const char *p;
  char ch, start_ch;
  MIR_op_t op, original;
  MIR_op_mode_t mode;

  for (nop = 0, p = pat->pattern; *p != 0; p++, nop++) {
    while (*p == ' ' || *p == '\t') p++;
    if (*p == '$') return TRUE;
    if (MIR_call_code_p (insn->code) && nop >= nops) return FALSE;
    gen_assert (nop < nops);
    op = insn->ops[nop];
    switch (start_ch = *p) {
    case 'X': break;
    case 'r':
      ch = *++p;
      if (ch != 'p') {
        p--;
        if (op.mode != MIR_OP_VAR || op.u.var == R0_HARD_REG) return FALSE;
      } else {
        if (op.mode != MIR_OP_VAR || op.u.var == R0_HARD_REG || op.u.var == SP_HARD_REG)
          return FALSE;
      }
      break;
    case 'h': {
      uint64_t num;
      p++;
      num = read_dec (&p);
      if (op.mode != MIR_OP_VAR || op.u.var != num) return FALSE;
      break;
    }
    case 'C':
      if (op.mode != MIR_OP_VAR || !compressed_reg_p (op.u.var, FALSE)) return FALSE;
      break;
    case 'c': {
      uint64_t num;
      p++;
      num = read_dec (&p);
      if ((op.mode != MIR_OP_INT && op.mode != MIR_OP_UINT) || op.u.u != num) return FALSE;
      break;
    }
    case 'm': {
      MIR_type_t type, type2, type3 = MIR_T_BOUND;
      int scale, u_p, s_p, compressed_p = FALSE;

      if (op.mode != MIR_OP_VAR_MEM) return FALSE;
      u_p = s_p = TRUE;
      ch = *++p;
      if (ch == 'c') {
        compressed_p = TRUE;
        ch = *++p;
      }
      switch (ch) {
      case 'f':
        gen_assert (!compressed_p);
        type = MIR_T_F;
        type2 = MIR_T_BOUND;
        scale = 4;
        break;
      case 'd':
        type = MIR_T_D;
        type2 = MIR_T_BOUND;
        scale = 8;
        break;
      case 'l':
        ch = *++p;
        gen_assert (ch == 'd' && !compressed_p);
        type = MIR_T_LD;
        type2 = MIR_T_BOUND;
        scale = 16;
        break;
      case 'u':
        gen_assert (!compressed_p);
        /* fall through */
      case 's':
        u_p = ch == 'u';
        s_p = ch == 's';
        ch = *++p;
        /* fall through */
      default:
        gen_assert ('0' <= ch && ch <= '3');
        gen_assert (!compressed_p || '2' <= ch);
        scale = 1 << (ch - '0');
        if (ch == '0') {
          type = u_p ? MIR_T_U8 : MIR_T_I8;
          type2 = u_p && s_p ? MIR_T_I8 : MIR_T_BOUND;
        } else if (ch == '1') {
          type = u_p ? MIR_T_U16 : MIR_T_I16;
          type2 = u_p && s_p ? MIR_T_I16 : MIR_T_BOUND;
        } else if (ch == '2') {
          type = u_p ? MIR_T_U32 : MIR_T_I32;
          type2 = u_p && s_p ? MIR_T_I32 : MIR_T_BOUND;
#if MIR_PTR32
          if (u_p) type3 = MIR_T_P;
#endif
        } else {
          type = u_p ? MIR_T_U64 : MIR_T_I64;
          type2 = u_p && s_p ? MIR_T_I64 : MIR_T_BOUND;
#if MIR_PTR64
          type3 = MIR_T_P;
#endif
        }
      }
      if (op.u.var_mem.type != type && op.u.var_mem.type != type2 && op.u.var_mem.type != type3)
        return FALSE;
      if (op.u.var_mem.index != MIR_NON_VAR || op.u.var_mem.disp < -(1 << 11)
          || op.u.var_mem.disp >= (1 << 11)
          || (type == MIR_T_LD && op.u.var_mem.disp + 8 >= (1 << 11)))
        return FALSE;
      if (compressed_p) {
        if (op.u.var_mem.disp < 0 || op.u.var_mem.disp % scale != 0) return FALSE;
        ch = *++p;
        if (ch == 's') {
          if (op.u.var_mem.base != SP_HARD_REG) return FALSE;
          if (op.u.var_mem.disp / scale >= (1 << 6)) return FALSE;
        } else {
          p--;
          if (!compressed_reg_p (op.u.var_mem.base, TRUE)) return FALSE;
          if (op.u.var_mem.disp / scale >= (1 << 5)) return FALSE;
        }
      }
      break;
    }
    case 'i': {
      ch = *++p;
      if (op.mode != MIR_OP_INT && op.mode != MIR_OP_UINT && (ch != 'a' || op.mode != MIR_OP_REF))
        return FALSE;
      if ((ch == 'u' || ch == 'a') && (op.mode == MIR_OP_INT || op.mode == MIR_OP_UINT)) {
        assert (nop == 1);
        if (op.u.i < -(1l << 31) || op.u.i >= (1l << 31)) return FALSE;
        if (ch == 'u' && (op.u.i & 0xfff) != 0) return FALSE;
      } else if (ch == 'a' && op.mode == MIR_OP_REF) {
        int64_t v;

        if (op.u.ref->item_type == MIR_data_item && op.u.ref->u.data->name != NULL
            && _MIR_reserved_ref_name_p (ctx, op.u.ref->u.data->name)) {
          v = (int64_t) op.u.ref->u.data->u.els;
        } else {
          v = (int64_t) op.u.ref->addr;
        }
        if (v < -(1l << 31) || v >= (1l << 31)) return FALSE;
      } else {
        assert (nop == 1 || nop == 2);
        p--;
        if (op.u.i < -(1 << 11) || op.u.i >= (1 << 11)) return FALSE;
      }
      break;
    }
    case 'j':
      if (op.mode != MIR_OP_INT && op.mode != MIR_OP_UINT) return FALSE;
      int64_t i = op.u.i;
      ch = *++p;
      if (ch == 'u') {
        assert (nop == 1);
        i = (i + 15) / 16 * 16;
        ch = *++p;
        if (ch != 's') {
          p--;
          if (i <= -(1 << 11) || i >= (1 << 11)) return FALSE;
        } else {
          if (i == 0 || i <= -(1 << 9) || i >= (1 << 9)) return FALSE;
        }
      } else {
        p--;
        assert (nop == 2);
        if (i <= -(1 << 11) || i >= (1 << 11)) return FALSE;
      }
      break;
    case 'I': {
      if (op.mode != MIR_OP_INT && op.mode != MIR_OP_UINT && op.mode != MIR_OP_REF) return FALSE;
      break;
    }
    case 's':
    case 'S': {
      assert (nop == 2);
      if (op.mode != MIR_OP_INT && op.mode != MIR_OP_UINT) return FALSE;
      if (op.u.i < 0 || (start_ch == 's' && op.u.i > 31) || (start_ch == 'S' && op.u.i > 63))
        return FALSE;
      if (start_ch == 'S') {
        ch = *++p;
        if (ch != 'p') {
          p--;
        } else {
          if (op.u.i == 0) return FALSE;
        }
      }
      break;
    }
    case 'k':
      if (op.mode != MIR_OP_INT && op.mode != MIR_OP_UINT) return FALSE;
      assert (nop == 1 || nop == 2);
      ch = *++p;
      if (ch == 'p') {
        if (op.u.i == 0 || op.u.i < -(1 << 5) || op.u.i >= (1 << 5)) return FALSE;
      } else if (ch == 's') {
        if (op.u.i == 0 || op.u.i % 16 != 0 || op.u.i < -(1 << 9) || op.u.i >= (1 << 9))
          return FALSE;
      } else if (ch == 'w') {
        if (op.u.i <= 0 || op.u.i % 4 != 0 || op.u.i / 4 >= (1 << 8)) return FALSE;
      } else if (ch != 'u') {
        p--;
        if (op.u.i < -(1 << 5) || op.u.i >= (1 << 5)) return FALSE;
      } else {
        if (op.u.i == 0 || (op.u.i & 0xfff) != 0 || (((int64_t) op.u.i << 46) >> 46) != op.u.i)
          return FALSE;
      }
      break;
    case 'l':
    case 'L':
    case 'U':
      if (op.mode != MIR_OP_LABEL) return FALSE;
      break;
    case '0':
    case '1':
    case '2':
      n = start_ch - '0';
      gen_assert (n < (int) nop);
      original = insn->ops[n];
      mode = op.mode;
      if (mode == MIR_OP_UINT) mode = MIR_OP_INT;
      if (original.mode != mode && (original.mode != MIR_OP_UINT || mode != MIR_OP_INT))
        return FALSE;
      gen_assert (mode == MIR_OP_VAR || mode == MIR_OP_INT || mode == MIR_OP_FLOAT
                  || mode == MIR_OP_DOUBLE || mode == MIR_OP_LDOUBLE || mode == MIR_OP_VAR_MEM
                  || mode == MIR_OP_LABEL);
      if (mode == MIR_OP_VAR && op.u.var != original.u.var)
        return FALSE;
      else if (mode == MIR_OP_INT && op.u.i != original.u.i)
        return FALSE;
      else if (mode == MIR_OP_FLOAT && op.u.f != original.u.f)
        return FALSE;
      else if (mode == MIR_OP_DOUBLE && op.u.d != original.u.d)
        return FALSE;
      else if (mode == MIR_OP_LDOUBLE && op.u.ld != original.u.ld)
        return FALSE;
      else if (mode == MIR_OP_LABEL && op.u.label != original.u.label)
        return FALSE;
      else if (mode == MIR_OP_VAR_MEM
               && (op.u.var_mem.type != original.u.var_mem.type
                   || op.u.var_mem.scale != original.u.var_mem.scale
                   || op.u.var_mem.base != original.u.var_mem.base
                   || op.u.var_mem.index != original.u.var_mem.index
                   || op.u.var_mem.disp != original.u.var_mem.disp))
        return FALSE;
      break;
    default: gen_assert (FALSE);
    }
  }
  gen_assert (nop == nops);
  return TRUE;
}

static const char *find_insn_pattern_replacement (gen_ctx_t gen_ctx, MIR_insn_t insn) {
  int i;
  const struct pattern *pat;
  insn_pattern_info_t info = VARR_GET (insn_pattern_info_t, insn_pattern_info, insn->code);

  for (i = 0; i < info.num; i++) {
    pat = &patterns[VARR_GET (int, pattern_indexes, info.start + i)];
    if (pattern_match_p (gen_ctx, pat, insn)) return pat->replacement;
  }
  return NULL;
}

static void patterns_finish (gen_ctx_t gen_ctx) {
  VARR_DESTROY (int, pattern_indexes);
  VARR_DESTROY (insn_pattern_info_t, insn_pattern_info);
}

static int hex_value (int ch) {
  return ('0' <= ch && ch <= '9'   ? ch - '0'
          : 'A' <= ch && ch <= 'F' ? ch - 'A' + 10
          : 'a' <= ch && ch <= 'f' ? ch - 'a' + 10
                                   : -1);
}

static uint64_t read_hex (const char **ptr) {
  int v;
  const char *p;
  uint64_t res = 0;

  for (p = *ptr; (v = hex_value (*p)) >= 0; p++) {
    gen_assert ((res >> 60) == 0);
    res = res * 16 + v;
  }
  gen_assert (p != *ptr);
  *ptr = p - 1;
  return res;
}

static void put_byte (struct gen_ctx *gen_ctx, int byte) { VARR_PUSH (uint8_t, result_code, byte); }

static void put_uint64 (struct gen_ctx *gen_ctx, uint64_t v, int nb) { /* Little endian */
  for (; nb > 0; nb--) {
    put_byte (gen_ctx, v & 0xff);
    v >>= 8;
  }
}

static void set_int64 (uint8_t *addr, int64_t v, int nb) { /* Little endian */
  for (; nb > 0; nb--) {
    *addr++ = v & 0xff;
    v >>= 8;
  }
}

static int64_t get_int64 (uint8_t *addr, int nb) { /* Little endian */
  int64_t v = 0;
  int i, sh = (8 - nb) * 8;

  for (i = nb - 1; i >= 0; i--) v = (v << 8) | addr[i];
  if (sh > 0) v = (v << sh) >> sh; /* make it signed */
  return v;
}

static uint32_t check_and_set_mask (uint32_t opcode_mask, uint32_t mask) {
  gen_assert ((opcode_mask & mask) == 0);
  return opcode_mask | mask;
}

static void out_insn (gen_ctx_t gen_ctx, MIR_insn_t insn, const char *replacement,
                      void **jump_addrs) {
  MIR_context_t ctx = gen_ctx->ctx;
  const char *p, *insn_str;
  label_ref_t lr;
  const_ref_t cr;
  int switch_table_addr_p = FALSE;
  size_t nops = MIR_insn_nops (ctx, insn);

  if (insn->code == MIR_ALLOCA
      && (insn->ops[1].mode == MIR_OP_INT || insn->ops[1].mode == MIR_OP_UINT))
    insn->ops[1].u.u = (insn->ops[1].u.u + 15) & -16;
  for (insn_str = replacement;; insn_str = p + 1) {
    char ch, ch2, start_ch, d;
    uint32_t insn32 = 0, insn_mask = 0, el_mask = 0;
    int n, opcode = -1, funct3 = -1, funct7 = -1, rd = -1, rs1 = -1, rs2 = -1;
    int opcodec = -1, funct3c = -1, funct4c = -1, funct6c = -1, funct2c = -1, funct2bc = -1;
    int rs2m = -1, rdc = -1, rs2c = -1, uimm8c = -1;
    int shamt = -1, shamtc = -1, imm12, imm20, imm6c = 0, st_disp = 0;
    int unsign_disp4 = -1, unsign_disp8 = -1;
    int unsign_sp_disp4 = -1, unsign_sp_disp8 = -1;
    int unsign_sp_store_disp4 = -1, unsign_sp_store_disp8 = -1;
    int imm12_p = FALSE, imm20_p = FALSE, imm6c_p = FALSE, st_disp_p = FALSE;
    MIR_op_t op;
    int label_ref_num = -1;

    for (p = insn_str; (ch = *p) != '\0' && ch != ';'; p++) {
      if ((ch = *p) == 0 || ch == ';') break;
      el_mask = 0;
      switch ((start_ch = ch = *p)) {
      case ' ':
      case '\t': break;
      case 'O':
        p++;
        gen_assert (hex_value (*p) >= 0 && opcode < 0 && opcodec < 0);
        opcode = read_hex (&p);
        assert (opcode < (1 << 7));
        el_mask = 0x3f;
        break;
      case 'F':
        p++;
        gen_assert (hex_value (*p) >= 0 && funct3 < 0);
        funct3 = read_hex (&p);
        assert (funct3 < (1 << 3));
        el_mask = 0xf000;
        break;
      case 'f':
        p++;
        gen_assert (hex_value (*p) >= 0 && funct7 < 0);
        funct7 = read_hex (&p);
        assert (funct7 < (1 << 7));
        el_mask = 0xfe000000;
        break;
      case 'g':
        p++;
        gen_assert (hex_value (*p) >= 0 && funct7 < 0);
        funct7 = read_hex (&p);
        assert (funct7 < (1 << 6));
        el_mask = 0xfc000000;
        break;
      case 'o':
        p++;
        gen_assert (hex_value (*p) >= 0 && opcode < 0 && opcodec < 0);
        opcodec = read_hex (&p);
        assert (opcode < 4);
        el_mask = 0x3;
        break;
      case 'a':
        gen_assert (opcodec >= 0);
        p++;
        gen_assert (hex_value (*p) >= 0 && opcode < 0);
        funct3c = read_hex (&p);
        assert (funct3c < 8);
        el_mask = 0xe000;
        break;
      case 'b':
        gen_assert (opcodec >= 0);
        p++;
        gen_assert (hex_value (*p) >= 0 && opcode < 0);
        funct4c = read_hex (&p);
        assert (funct4c < 16);
        el_mask = 0xf000;
        break;
      case 'c':
        gen_assert (opcodec >= 0);
        p++;
        gen_assert (hex_value (*p) >= 0 && opcode < 0);
        funct6c = read_hex (&p);
        assert (funct6c < 64);
        el_mask = 0xfc00;
        break;
      case 'd':
      case 'e':
        gen_assert (opcodec >= 0);
        p++;
        gen_assert (hex_value (*p) >= 0 && opcode < 0);
        n = read_hex (&p);
        assert (n < 4);
        if (start_ch == 'd') {
          funct2c = n;
          el_mask = 0x60;
        } else {
          funct2bc = n;
          el_mask = 0xc00;
        }
        break;
      case 'r':
      case 'h': {
        int reg;
        ch2 = *++p;
        gen_assert (ch2 == 'd' || ch2 == 's' || ch2 == 'S' || ch2 == 't' || ch2 == 'u'
                    || ch2 == 'v');
        ch = *++p;
        if (start_ch == 'h') {
          reg = read_hex (&p);
        } else {
          gen_assert ('0' <= ch && ch <= '2' && ch - '0' < (int) nops);
          op = insn->ops[ch - '0'];
          gen_assert (op.mode == MIR_OP_VAR);
          reg = op.u.var;
        }
        if (reg >= F0_HARD_REG) reg -= F0_HARD_REG;
        gen_assert (reg <= 31);
        if (ch2 == 'd') {
          rd = reg;
          el_mask = 0xf80;
        } else if (ch2 == 's') {
          rs1 = reg;
          el_mask = 0xf8000;
        } else if (ch2 == 'S') {
          rs2 = reg;
          el_mask = 0x1f00000;
        } else if (ch2 == 't') {
          rs2m = reg;
          el_mask = 0x7c;
        } else if (ch2 == 'u') {
          gen_assert (compressed_reg_p (reg, FALSE));
          rdc = reg - (reg <= R15_HARD_REG ? R8_HARD_REG : F8_HARD_REG);
          el_mask = 0x380;
        } else if (ch2 == 'v') {
          gen_assert (compressed_reg_p (reg, FALSE));
          rs2c = reg - (reg <= R15_HARD_REG ? R8_HARD_REG : F8_HARD_REG);
          el_mask = 0x1c;
        } else {
          gen_assert (FALSE);
        }
        break;
      }
      case 'm':
        ch = *++p;
        if (ch == 'c') {
          op = insn->ops[0];
          if (op.mode == MIR_OP_VAR_MEM) { /* store */
            gen_assert (insn->ops[1].mode == MIR_OP_VAR);
          } else {
            op = insn->ops[1];
            gen_assert (op.mode == MIR_OP_VAR_MEM && insn->ops[0].mode == MIR_OP_VAR);
          }
          ch = *++p;
          gen_assert (ch == '2' || ch == '3' || ch == 'd');
          d = op.u.var_mem.disp >> (ch == '2' ? 2 : 3);
          if (*++p == 's') {
            gen_assert (d < (1 << 6) && op.u.var_mem.base == SP_HARD_REG);
            if (*++p == 's') {
              if (ch == '2') {
                unsign_sp_store_disp4 = d;
              } else {
                unsign_sp_store_disp8 = d;
              }
              el_mask = 0x1f80;
            } else {
              p--;
              if (ch == '2') {
                unsign_sp_disp4 = d;
              } else {
                unsign_sp_disp8 = d;
              }
              el_mask = 0x107c;
            }
          } else {
            gen_assert (compressed_reg_p (op.u.var_mem.base, TRUE));
            rdc = op.u.var_mem.base - R8_HARD_REG;
            p--;
            gen_assert (d < (1 << 5));
            if (ch == '2') {
              unsign_disp4 = d;
            } else {
              unsign_disp8 = d;
            }
            el_mask = 0x1fe0;
          }
        } else {
          if (ch == 's') { /* store */
            gen_assert (insn->ops[0].mode == MIR_OP_VAR_MEM);
            op = insn->ops[0];
            st_disp = ((op.u.var_mem.disp << 13) & 0x01fc0000) | (op.u.var_mem.disp & 0x1f);
            el_mask = 0xfe000f80;
            st_disp_p = TRUE;
          } else { /* load */
            gen_assert (ch == 'l' && insn->ops[1].mode == MIR_OP_VAR_MEM);
            op = insn->ops[1];
            imm12 = op.u.var_mem.disp;
            imm12_p = TRUE;
            el_mask = 0xfff00000;
          }
          el_mask |= 0xf8000;
          rs1 = op.u.var_mem.base;
        }
        break;
      case 's':
      case 'S':
        el_mask = (start_ch == 's' ? 0x1f00000 : 0x3f00000);
        ch = *++p;
        if (hex_value (ch) >= 0) {
          shamt = read_hex (&p);
        } else if (start_ch == 'S' && ch == 'p') {
          op = insn->ops[2];
          gen_assert (op.mode == MIR_OP_INT || op.mode == MIR_OP_UINT);
          shamtc = op.u.i;
          el_mask = 0x107c;
          gen_assert (shamtc > 0);
        } else {
          p--;
          op = insn->ops[2];
          gen_assert (op.mode == MIR_OP_INT || op.mode == MIR_OP_UINT);
          shamt = op.u.i;
          gen_assert (shamt >= 0);
        }
        break;
      case 'i':
        ch = *++p;
        if (ch == '-' || hex_value (ch) >= 0) { /* i[-]<hex> */
          int neg_p = FALSE;
          if (ch == '-') {
            ch = *++p;
            neg_p = TRUE;
          }
          gen_assert (hex_value (ch) >= 0);
          imm12 = read_hex (&p);
          if (neg_p) imm12 = -imm12;
          el_mask = 0xfff00000;
          imm12_p = TRUE;
        } else if (ch == 'h' || ch == 'l') {
          int32_t v;
          op = insn->ops[1];
          if (op.mode != MIR_OP_REF) {
            v = (int32_t) op.u.i;
          } else if (op.u.ref->item_type == MIR_data_item && op.u.ref->u.data->name != NULL
                     && _MIR_reserved_ref_name_p (ctx, op.u.ref->u.data->name)) {
            v = (int32_t) (int64_t) op.u.ref->u.data->u.els;
          } else {
            v = (int32_t) (int64_t) op.u.ref->addr;
          }
          imm12 = (v << 20) >> 20;
          el_mask = 0xfff00000;
          imm12_p = TRUE;
          if (ch == 'h') {
            imm20 = (v - imm12) >> 12;
            el_mask = 0xfffff000;
            imm12_p = FALSE;
            imm20_p = TRUE;
          }
        } else if (ch == 'u') {
          ch = *++p;
          if (hex_value (ch) >= 0) { /* iu<hex> */
            imm20 = read_hex (&p);
            el_mask = 0xfffff000;
            imm20_p = TRUE;
          } else { /* iu */
            p--;
            op = insn->ops[1];
            gen_assert (op.mode == MIR_OP_INT || op.mode == MIR_OP_UINT);
            gen_assert ((op.u.i & 0xfff) == 0);
            imm20 = op.u.i >> 12;
            el_mask = 0xfffff000;
            imm20_p = TRUE;
          }
        } else { /* i */
          p--;
          imm12 = (nops > 2 && (insn->ops[2].mode == MIR_OP_INT || insn->ops[2].mode == MIR_OP_UINT)
                     ? insn->ops[2].u.i
                     : insn->ops[1].u.i);
          imm12_p = TRUE;
          el_mask = 0xfff00000;
        }
        break;
      case 'j':
        ch = *++p;
        if (ch == 'u') { /* ju */
          op = insn->ops[1];
          gen_assert (op.mode == MIR_OP_INT || op.mode == MIR_OP_UINT);
          ch = *++p;
          if (ch != 's') {
            p--;
            imm12 = -(op.u.i + 15) / 16 * 16;
            el_mask = 0xfff00000;
            imm12_p = TRUE;
          } else {
            imm6c = -(op.u.i + 15) / 16;
            imm6c = (imm6c & 0x20) | ((imm6c & 0x1) << 4) | ((imm6c & 0x4) << 1)
                    | ((imm6c & 0x18) >> 2) | ((imm6c & 0x2) >> 1);
            imm6c_p = TRUE;
            el_mask = 0x107c;
          }
        } else { /* j */
          p--;
          op = insn->ops[2];
          gen_assert (op.mode == MIR_OP_INT || op.mode == MIR_OP_UINT);
          imm12 = -op.u.i;
          el_mask = 0xfff00000;
          imm12_p = TRUE;
        }
        break;
      case 'I': {
        op = insn->ops[1];
        gen_assert (op.mode == MIR_OP_INT || op.mode == MIR_OP_UINT || op.mode == MIR_OP_REF);
        if (op.mode != MIR_OP_REF) {
          cr.val = op.u.u;
        } else if (op.u.ref->item_type == MIR_data_item && op.u.ref->u.data->name != NULL
                   && _MIR_reserved_ref_name_p (ctx, op.u.ref->u.data->name)) {
          cr.val = (uint64_t) op.u.ref->u.data->u.els;
        } else {
          cr.val = (uint64_t) op.u.ref->addr;
        }
        cr.const_addr_disp = VARR_LENGTH (uint8_t, result_code);
        VARR_PUSH (const_ref_t, const_refs, cr);
        break;
      }
      case 'T': {
        gen_assert (!switch_table_addr_p);
        imm12 = VARR_LENGTH (uint8_t, result_code) % 8;
        if (imm12 != 0) imm12 = 8 - imm12;
        imm12 += 16;
        el_mask = 0xfff00000;
        imm12_p = TRUE;
        switch_table_addr_p = TRUE;
        break;
      }
      case 'k':
        ch = *++p;
        imm6c_p = TRUE;
        el_mask = 0x107c;
        if (ch == '-' || hex_value (ch) >= 0) { /* i[-]<hex> */
          int neg_p = FALSE;
          if (ch == '-') {
            ch = *++p;
            neg_p = TRUE;
          }
          gen_assert (hex_value (ch) >= 0);
          imm6c = read_hex (&p);
          gen_assert (imm6c != 0 && -32 < imm6c && imm6c < 32);
          if (neg_p) imm6c = -imm6c;
        } else {
          imm6c = (nops > 2 && (insn->ops[2].mode == MIR_OP_INT || insn->ops[2].mode == MIR_OP_UINT)
                     ? insn->ops[2].u.i
                     : insn->ops[1].u.i);
          if (ch == 'u') { /* ku */
            imm6c >>= 12;
          } else if (ch == 's') { /* ks */
            imm6c >>= 4;
            imm6c = (imm6c & 0x20) | ((imm6c & 0x1) << 4) | ((imm6c & 0x4) << 1)
                    | ((imm6c & 0x18) >> 2) | ((imm6c & 0x2) >> 1);
          } else if (ch == 'w') { /* kw */
            uimm8c = imm6c >> 2;
            uimm8c = ((uimm8c & 0xc) << 4) | ((uimm8c & 0xf0) >> 2) | ((uimm8c & 0x1) << 1)
                     | ((uimm8c & 0x2) >> 1);
            el_mask = 0x1fe0;
            imm6c_p = FALSE;
          } else {
            p--;
          }
        }
        break;
      case 'l':
      case 'L':
      case 'U':
        n = 0;
        if (insn->code == MIR_CALL || insn->code == MIR_INLINE || insn->code == MIR_LADDR) n = 1;
        op = insn->ops[n];
        gen_assert (op.mode == MIR_OP_LABEL || op.mode == MIR_OP_REF);
        lr.abs_addr_p = FALSE;
        lr.branch_type = start_ch == 'l' ? BRANCH : start_ch == 'L' ? JAL : AUIPC;
        lr.label_val_disp = 0;
        if (jump_addrs == NULL)
          lr.u.label = op.u.label;
        else
          lr.u.jump_addr = jump_addrs[0];
        label_ref_num = VARR_LENGTH (label_ref_t, label_refs);
        VARR_PUSH (label_ref_t, label_refs, lr);
        el_mask = start_ch == 'l' ? 0xfe000f80 : 0xfffff000;
        break;
      default: gen_assert (FALSE);
      }
    }
    if (opcode >= 0) insn32 |= opcode;
    if (funct3 >= 0) insn32 |= (funct3 << 12);
    if (funct7 >= 0) insn32 |= (funct7 << 25);
    if (opcodec >= 0) insn32 |= opcodec;
    if (funct3c >= 0) insn32 |= (funct3c << 13);
    if (funct4c >= 0) insn32 |= (funct4c << 12);
    if (funct6c >= 0) insn32 |= (funct6c << 10);
    if (funct2c >= 0) insn32 |= (funct2c << 5);
    if (funct2bc >= 0) insn32 |= (funct2bc << 10);
    if (rd >= 0) {
      gen_assert (rd <= 31);
      insn32 |= rd << 7;
    }
    if (rs1 >= 0) {
      gen_assert (rs1 <= 31);
      insn32 |= rs1 << 15;
    }
    if (rs2 >= 0) {
      gen_assert (rs2 <= 31);
      insn32 |= rs2 << 20;
    }
    if (rs2m >= 0) {
      gen_assert (rs2m <= 31);
      insn32 |= rs2m << 2;
    }
    if (rdc >= 0) {
      gen_assert (rdc <= 15);
      insn32 |= rdc << 7;
    }
    if (rs2c >= 0) {
      gen_assert (rs2c <= 15);
      insn32 |= rs2c << 2;
    }
    if (shamt >= 0) insn32 |= shamt << 20;
    if (shamtc >= 0) insn32 |= ((shamtc & 0x20) << 7) | ((shamtc & 0x1f) << 2);
    if (imm12_p) insn32 |= imm12 << 20;
    if (imm20_p) insn32 |= imm20 << 12;
    if (imm6c_p) insn32 |= ((imm6c & 0x20) << 7) | ((imm6c & 0x1f) << 2);
    if (uimm8c >= 0) insn32 |= uimm8c << 5;
    if (st_disp_p) insn32 |= st_disp << 7;
    if (unsign_disp4 >= 0)
      insn32
        |= ((unsign_disp4 & 0xe) << 9) | ((unsign_disp4 & 0x1) << 6) | ((unsign_disp4 & 0x10) << 1);
    if (unsign_disp8 >= 0) insn32 |= ((unsign_disp8 & 0x7) << 10) | ((unsign_disp8 & 0x18) << 2);
    if (unsign_sp_disp4 >= 0)
      insn32 |= ((unsign_sp_disp4 & 0x8) << 9) | ((unsign_sp_disp4 & 0x7) << 4)
                | ((unsign_sp_disp4 & 0x30) >> 2);
    if (unsign_sp_disp8 >= 0)
      insn32 |= ((unsign_sp_disp8 & 0x4) << 10) | ((unsign_sp_disp8 & 0x3) << 5)
                | ((unsign_sp_disp8 & 0x38) >> 1);
    if (unsign_sp_store_disp4 >= 0)
      insn32 |= ((unsign_sp_store_disp4 & 0xf) << 9) | ((unsign_sp_store_disp4 & 0x30) << 3);
    if (unsign_sp_store_disp8 >= 0)
      insn32 |= ((unsign_sp_store_disp8 & 0x7) << 10) | ((unsign_sp_store_disp8 & 0x38) << 4);
    insn_mask = check_and_set_mask (insn_mask, el_mask);
    if (label_ref_num >= 0) VARR_ADDR (label_ref_t, label_refs)
    [label_ref_num].label_val_disp = VARR_LENGTH (uint8_t, result_code);

    if (opcode >= 0) {
      put_uint64 (gen_ctx, insn32, 4); /* output the machine insn */
    } else {
      gen_assert ((insn32 & 0xffff0000) == 0 && (insn_mask & 0xffff0000) == 0);
      put_uint64 (gen_ctx, insn32, 2); /* output the machine insn */
    }
    if (*p == 0) break;
  }
  if (!switch_table_addr_p) return;
  gen_assert (insn->code == MIR_SWITCH);
  if (VARR_LENGTH (uint8_t, result_code) % 8 != 0)
    put_uint64 (gen_ctx, 0, 8 - VARR_LENGTH (uint8_t, result_code) % 8);
  for (size_t i = 1; i < insn->nops; i++) {
    gen_assert (insn->ops[i].mode == MIR_OP_LABEL);
    lr.abs_addr_p = TRUE;
    lr.label_val_disp = VARR_LENGTH (uint8_t, result_code);
    if (jump_addrs == NULL)
      lr.u.label = insn->ops[i].u.label;
    else
      lr.u.jump_addr = jump_addrs[i - 1];
    VARR_PUSH (label_ref_t, label_refs, lr);
    put_uint64 (gen_ctx, 0, 8);
  }
}

static int target_memory_ok_p (gen_ctx_t gen_ctx MIR_UNUSED, MIR_op_t *op_ref) {
  if (op_ref->mode != MIR_OP_VAR_MEM) return FALSE;
  if (op_ref->u.var_mem.index == MIR_NON_VAR && op_ref->u.var_mem.disp >= -(1 << 11)
      && op_ref->u.var_mem.disp < (1 << 11)
      && (op_ref->u.var_mem.type != MIR_T_LD || op_ref->u.var_mem.disp + 8 < (1 << 11)))
    return TRUE;
  return FALSE;
}

static int target_insn_ok_p (gen_ctx_t gen_ctx, MIR_insn_t insn) {
  return find_insn_pattern_replacement (gen_ctx, insn) != NULL;
}

static const uint32_t b_imm_mask = ((0x7f << 25) | (0x1f << 7));
static uint32_t get_b_format_imm (int32_t offset) {
  int d = offset >> 1; /* scale */
  gen_assert (-(1 << 11) <= d && d < (1 << 11));
  return ((((d >> 5) & 0x40) | ((d >> 4) & 0x3f)) << 25)
         | ((((d & 0xf) << 1) | ((d >> 10) & 0x1)) << 7);
}

static void add_consts (gen_ctx_t gen_ctx) {
  /* Setting up 64-bit const addresses */
  for (size_t i = 0; i < VARR_LENGTH (const_ref_t, const_refs); i++) {
    const_ref_t cr = VARR_GET (const_ref_t, const_refs, i);
    uint32_t disp, carry;
    gen_assert (VARR_LENGTH (uint8_t, result_code) > cr.const_addr_disp
                && VARR_LENGTH (uint8_t, result_code) - cr.const_addr_disp < (1l << 31));
    disp = (uint32_t) (VARR_LENGTH (uint8_t, result_code) - cr.const_addr_disp);
    carry = (disp & 0x800) << 1;
    *(uint32_t *) (&VARR_ADDR (uint8_t, result_code)[cr.const_addr_disp])
      |= (disp + carry) & 0xfffff000;
    *(uint32_t *) (&VARR_ADDR (uint8_t, result_code)[cr.const_addr_disp + 4]) |= disp << 20;
    put_uint64 (gen_ctx, cr.val, 8);
  }
}

static void target_split_insns (gen_ctx_t gen_ctx) {
  MIR_context_t ctx = gen_ctx->ctx;
  MIR_insn_t insn, next_insn;
  MIR_op_t op;

  for (insn = DLIST_HEAD (MIR_insn_t, curr_func_item->u.func->insns); insn != NULL;
       insn = next_insn) {
    next_insn = DLIST_NEXT (MIR_insn_t, insn);
    if (insn->code != MIR_LDMOV) continue;

    if (insn->ops[0].mode == MIR_OP_VAR) {
      gen_assert (insn->ops[0].u.var + 1 < F0_HARD_REG && insn->ops[1].mode == MIR_OP_VAR_MEM);
      op = insn->ops[1];
      op.u.var_mem.type = MIR_T_I64;
      next_insn = gen_mov (gen_ctx, insn, MIR_MOV, insn->ops[0], op);
      op.u.var_mem.disp += 8;
      gen_mov (gen_ctx, insn, MIR_MOV, _MIR_new_var_op (ctx, insn->ops[0].u.var + 1), op);
      gen_delete_insn (gen_ctx, insn);
    } else if (insn->ops[1].mode == MIR_OP_VAR) {
      gen_assert (insn->ops[1].u.var + 1 < F0_HARD_REG && insn->ops[0].mode == MIR_OP_VAR_MEM);
      op = insn->ops[0];
      op.u.var_mem.type = MIR_T_I64;
      next_insn = gen_mov (gen_ctx, insn, MIR_MOV, op, insn->ops[1]);
      op.u.var_mem.disp += 8;
      gen_mov (gen_ctx, insn, MIR_MOV, op, _MIR_new_var_op (ctx, insn->ops[1].u.var + 1));
      gen_delete_insn (gen_ctx, insn);
    } else {
      gen_assert (insn->ops[0].mode == MIR_OP_VAR_MEM && insn->ops[1].mode == MIR_OP_VAR_MEM);
      op = insn->ops[1];
      op.u.var_mem.type = MIR_T_D;
      next_insn
        = gen_mov (gen_ctx, insn, MIR_DMOV, _MIR_new_var_op (ctx, TEMP_DOUBLE_HARD_REG1), op);
      op.u.var_mem.disp += 8;
      gen_mov (gen_ctx, insn, MIR_DMOV, _MIR_new_var_op (ctx, TEMP_DOUBLE_HARD_REG2), op);
      op = insn->ops[0];
      op.u.var_mem.type = MIR_T_D;
      gen_mov (gen_ctx, insn, MIR_DMOV, op, _MIR_new_var_op (ctx, TEMP_DOUBLE_HARD_REG1));
      op.u.var_mem.disp += 8;
      gen_mov (gen_ctx, insn, MIR_DMOV, op, _MIR_new_var_op (ctx, TEMP_DOUBLE_HARD_REG2));
      gen_delete_insn (gen_ctx, insn);
    }
  }
}

static uint8_t *target_translate (gen_ctx_t gen_ctx, size_t *len) {
  MIR_context_t ctx = gen_ctx->ctx;
  size_t i;
  MIR_insn_t insn;
  const char *replacement;

  gen_assert (curr_func_item->item_type == MIR_func_item);
  VARR_TRUNC (uint8_t, result_code, 0);
  VARR_TRUNC (label_ref_t, label_refs, 0);
  VARR_TRUNC (const_ref_t, const_refs, 0);
  VARR_TRUNC (uint64_t, abs_address_locs, 0);
  for (insn = DLIST_HEAD (MIR_insn_t, curr_func_item->u.func->insns); insn != NULL;
       insn = DLIST_NEXT (MIR_insn_t, insn)) {
    if (insn->code == MIR_LABEL) {
      set_label_disp (gen_ctx, insn, VARR_LENGTH (uint8_t, result_code));
    } else if (insn->code != MIR_USE) {
      replacement = find_insn_pattern_replacement (gen_ctx, insn);
      if (replacement == NULL) {
        fprintf (stderr, "fatal failure in matching insn:");
        MIR_output_insn (ctx, stderr, insn, curr_func_item->u.func, TRUE);
        exit (1);
      } else {
        gen_assert (replacement != NULL);
        out_insn (gen_ctx, insn, replacement, NULL);
      }
    }
  }
  /* Setting up labels */
  for (i = 0; i < VARR_LENGTH (label_ref_t, label_refs); i++) {
    label_ref_t lr = VARR_GET (label_ref_t, label_refs, i);

    if (!lr.abs_addr_p) {
      int64_t offset = (int64_t) get_label_disp (gen_ctx, lr.u.label) - (int64_t) lr.label_val_disp;
      uint32_t bin_insn;
      gen_assert ((offset & 0x1) == 0);
      if (lr.branch_type == BRANCH && (offset < -(1 << 12) || offset >= (1 << 12))) {
        /* BL:br L => BL:jmp NBL; ... NBL: br TL;jmp BL+4;TL:jmp L: */
        bin_insn = *(uint32_t *) (VARR_ADDR (uint8_t, result_code) + lr.label_val_disp);
        offset = (int64_t) VARR_LENGTH (uint8_t, result_code) - (int64_t) lr.label_val_disp;
        *(uint32_t *) (VARR_ADDR (uint8_t, result_code) + lr.label_val_disp)
          = 0x6f | get_j_format_imm (offset);
        bin_insn |= get_b_format_imm (8);
        put_uint64 (gen_ctx, bin_insn, 4);
        offset = (int64_t) lr.label_val_disp - (int64_t) VARR_LENGTH (uint8_t, result_code) + 4;
        bin_insn = 0x6f | get_j_format_imm (offset);
        put_uint64 (gen_ctx, bin_insn, 4);
        offset = (int64_t) get_label_disp (gen_ctx, lr.u.label)
                 - (int64_t) VARR_LENGTH (uint8_t, result_code);
        bin_insn = 0x6f | get_j_format_imm (offset);
        put_uint64 (gen_ctx, bin_insn, 4);
      } else if (lr.branch_type == AUIPC) {
        int hi = offset >> 12, low = offset & 0xfff;
        if ((low & 0x800) != 0) hi++;
        *(uint32_t *) (VARR_ADDR (uint8_t, result_code) + lr.label_val_disp) |= hi << 12;
        *(uint32_t *) (VARR_ADDR (uint8_t, result_code) + lr.label_val_disp + 4) |= low << 20;
      } else {
        gen_assert (lr.branch_type != AUIPC_JALR);
        *(uint32_t *) (VARR_ADDR (uint8_t, result_code) + lr.label_val_disp)
          |= (lr.branch_type == BRANCH ? get_b_format_imm (offset) : get_j_format_imm (offset));
      }
    } else {
      set_int64 (&VARR_ADDR (uint8_t, result_code)[lr.label_val_disp],
                 (int64_t) get_label_disp (gen_ctx, lr.u.label), 8);
      VARR_PUSH (uint64_t, abs_address_locs, lr.label_val_disp);
    }
  }
  while (VARR_LENGTH (uint8_t, result_code) % 8 != 0) /* Align the pool */
    VARR_PUSH (uint8_t, result_code, 0);
  add_consts (gen_ctx);
  while (VARR_LENGTH (uint8_t, result_code) % 16 != 0) /* Align the pool */
    VARR_PUSH (uint8_t, result_code, 0);
  *len = VARR_LENGTH (uint8_t, result_code);
  return VARR_ADDR (uint8_t, result_code);
}

static void target_rebase (gen_ctx_t gen_ctx, uint8_t *base) {
  MIR_code_reloc_t reloc;

  VARR_TRUNC (MIR_code_reloc_t, relocs, 0);
  for (size_t i = 0; i < VARR_LENGTH (uint64_t, abs_address_locs); i++) {
    reloc.offset = VARR_GET (uint64_t, abs_address_locs, i);
    reloc.value = base + get_int64 (base + reloc.offset, 8);
    VARR_PUSH (MIR_code_reloc_t, relocs, reloc);
  }
  _MIR_update_code_arr (gen_ctx->ctx, base, VARR_LENGTH (MIR_code_reloc_t, relocs),
                        VARR_ADDR (MIR_code_reloc_t, relocs));
  gen_setup_lrefs (gen_ctx, base);
}

static void target_change_to_direct_calls (MIR_context_t ctx MIR_UNUSED) {}

struct target_bb_version {
  uint8_t *base;
  label_ref_t branch_ref; /* label cand used for jump to this bb version */
};

static void target_init_bb_version_data (target_bb_version_t data) {
  data->base = NULL; /* we don't know origin branch */
}

static void target_bb_translate_start (gen_ctx_t gen_ctx) {
  add_nops = 0;
  VARR_TRUNC (uint8_t, result_code, 0);
  VARR_TRUNC (label_ref_t, label_refs, 0);
  VARR_TRUNC (const_ref_t, const_refs, 0);
  VARR_TRUNC (uint64_t, abs_address_locs, 0);
}

static void target_bb_insn_translate (gen_ctx_t gen_ctx, MIR_insn_t insn, void **jump_addrs) {
  const char *replacement;

  if (insn->code == MIR_LABEL) return;
  replacement = find_insn_pattern_replacement (gen_ctx, insn);
  gen_assert (replacement != NULL);
  out_insn (gen_ctx, insn, replacement, jump_addrs);
  if (MIR_branch_code_p (insn->code)) add_nops = insn->code == MIR_JMP ? 1 : 3;
}

static void target_output_jump (gen_ctx_t gen_ctx, void **jump_addrs) {
  out_insn (gen_ctx, temp_jump, temp_jump_replacement, jump_addrs);
  put_uint64 (gen_ctx, TARGET_NOP, 4); /* add space for transformation to auipc;jar */
}

static uint8_t *target_bb_translate_finish (gen_ctx_t gen_ctx, size_t *len) {
  /* add nops for conversion jmp->lui+jalr and br->jmp|lui+jalr */
  for (int i = 0; i < add_nops; i++) put_uint64 (gen_ctx, TARGET_NOP, 4);
  while (VARR_LENGTH (uint8_t, result_code) % 8 != 0) /* Align the pool */
    VARR_PUSH (uint8_t, result_code, 0);
  add_consts (gen_ctx);
  *len = VARR_LENGTH (uint8_t, result_code);
  return VARR_ADDR (uint8_t, result_code);
}

static void setup_rel (gen_ctx_t gen_ctx, label_ref_t *lr, uint8_t *base, void *addr) {
  MIR_context_t ctx = gen_ctx->ctx;
  int64_t offset = (int64_t) addr - (int64_t) (base + lr->label_val_disp);

  gen_assert ((offset & 0x1) == 0);
  /* check max 32-bit offset with possible branch conversion (see offset - 3): */
  if (lr->abs_addr_p || !(-(1l << 31) <= (offset / 2 - 3) && offset / 2 < (1l << 31))) {
    fprintf (stderr, "too big offset (%lld) in setup_rel", (long long) offset);
    exit (1);
  }
  /* ??? thread safe: */
  uint32_t *insn_ptr = (uint32_t *) (base + lr->label_val_disp), insn = *insn_ptr;
  if (lr->branch_type == BRANCH) {
    if (-(1 << 12) <= offset && offset < (1 << 12)) { /* a valid branch offset*/
      insn = (insn & ~b_imm_mask) | get_b_format_imm (offset);
    } else {
      insn = (insn & ~b_imm_mask) | get_b_format_imm (12); /* skip next jump and nop */
      _MIR_change_code (ctx, (uint8_t *) insn_ptr, (uint8_t *) &insn, 4);
      insn_ptr += 3;
      lr->branch_type = JAL;
      lr->label_val_disp += 12;
      offset -= 12;
    }
  }
  if (lr->branch_type == JAL) {
    if (-(1 << 20) <= offset && offset < (1 << 20)) { /* a valid jal offset*/
      insn = 0x6f | get_j_format_imm (offset);
    } else {
      lr->branch_type = AUIPC_JALR;
    }
  }
  if (lr->branch_type == AUIPC) {
    int hi = offset >> 12, low = offset & 0xfff;
    if ((low & 0x800) != 0) hi++;
    insn |= hi << 12;
    _MIR_change_code (ctx, (uint8_t *) insn_ptr, (uint8_t *) &insn, 4);
    insn_ptr += 1;
    insn = *insn_ptr | (low << 20);
  } else if (lr->branch_type == AUIPC_JALR) {
    uint32_t carry = (offset & 0x800) << 1;
    insn = 0x17 | (TEMP_INT_HARD_REG1 << 7)
           | (((uint32_t) offset + carry) & 0xfffff000); /* auipc t5 */
    _MIR_change_code (ctx, (uint8_t *) insn_ptr, (uint8_t *) &insn, 4);
    insn = 0x67 | (TEMP_INT_HARD_REG1 << 15) | ((offset & 0xfff) << 20); /* jr t5 */
    insn_ptr += 1;
  }
  _MIR_change_code (ctx, (uint8_t *) insn_ptr, (uint8_t *) &insn, 4);
}

static void target_bb_rebase (gen_ctx_t gen_ctx, uint8_t *base) {
  MIR_context_t ctx = gen_ctx->ctx;
  MIR_code_reloc_t reloc;

  /* Setting up relative labels */
  for (size_t i = 0; i < VARR_LENGTH (label_ref_t, label_refs); i++) {
    label_ref_t lr = VARR_GET (label_ref_t, label_refs, i);
    if (lr.abs_addr_p) {
      _MIR_change_code (ctx, (uint8_t *) base + lr.label_val_disp, (uint8_t *) &lr.u.jump_addr, 8);
    } else {
      setup_rel (gen_ctx, &lr, base, lr.u.jump_addr);
    }
  }
  VARR_TRUNC (MIR_code_reloc_t, relocs, 0);
  for (size_t i = 0; i < VARR_LENGTH (uint64_t, abs_address_locs); i++) {
    reloc.offset = VARR_GET (uint64_t, abs_address_locs, i);
    reloc.value = base + get_int64 (base + reloc.offset, 8);
    VARR_PUSH (MIR_code_reloc_t, relocs, reloc);
  }
  _MIR_update_code_arr (gen_ctx->ctx, base, VARR_LENGTH (MIR_code_reloc_t, relocs),
                        VARR_ADDR (MIR_code_reloc_t, relocs));
}

static void target_setup_succ_bb_version_data (gen_ctx_t gen_ctx, uint8_t *base) {
  if (VARR_LENGTH (label_ref_t, label_refs)
      != VARR_LENGTH (target_bb_version_t, target_succ_bb_versions))
    /* We can have more one possible branch from original insn
       (e.g. SWITCH, FBNE).  If it is so, we will make jumps only
       through BB thunk. */
    return;
  for (size_t i = 0; i < VARR_LENGTH (target_bb_version_t, target_succ_bb_versions); i++) {
    target_bb_version_t data = VARR_GET (target_bb_version_t, target_succ_bb_versions, i);
    if (data == NULL) continue;
    data->branch_ref = VARR_GET (label_ref_t, label_refs, i);
    data->base = base;
  }
}

static void target_redirect_bb_origin_branch (gen_ctx_t gen_ctx, target_bb_version_t data,
                                              void *addr) {
  MIR_context_t ctx = gen_ctx->ctx;

  if (data->base == NULL) return;
  if (data->branch_ref.abs_addr_p) {
    _MIR_change_code (ctx, (uint8_t *) data->base + data->branch_ref.label_val_disp,
                      (uint8_t *) &addr, 8);
  } else {
    setup_rel (gen_ctx, &data->branch_ref, data->base, addr);
  }
  data->base = NULL;
}

static void target_init (gen_ctx_t gen_ctx) {
  MIR_context_t ctx = gen_ctx->ctx;
  check_hard_reg_alloc_order ();
  gen_ctx->target_ctx = gen_malloc (gen_ctx, sizeof (struct target_ctx));
  VARR_CREATE (uint8_t, result_code, 0);
  VARR_CREATE (label_ref_t, label_refs, 0);
  VARR_CREATE (const_ref_t, const_refs, 0);
  VARR_CREATE (uint64_t, abs_address_locs, 0);
  VARR_CREATE (MIR_code_reloc_t, relocs, 0);
  MIR_type_t res = MIR_T_I64;
  MIR_var_t args1[] = {{MIR_T_F, "src", 0}};
  MIR_var_t args2[] = {{MIR_T_D, "src", 0}};
  _MIR_register_unspec_insn (ctx, FMVXW_CODE, "fmv.x.w", 1, &res, 1, FALSE, args1);
  _MIR_register_unspec_insn (ctx, FMVXD_CODE, "fmv.x.d", 1, &res, 1, FALSE, args2);
  patterns_init (gen_ctx);
  temp_jump = MIR_new_insn (ctx, MIR_JMP, MIR_new_label_op (ctx, NULL));
  temp_jump_replacement = find_insn_pattern_replacement (gen_ctx, temp_jump);
}

static void target_finish (gen_ctx_t gen_ctx) {
  patterns_finish (gen_ctx);
  _MIR_free_insn (gen_ctx->ctx, temp_jump);
  VARR_DESTROY (uint8_t, result_code);
  VARR_DESTROY (label_ref_t, label_refs);
  VARR_DESTROY (const_ref_t, const_refs);
  VARR_DESTROY (uint64_t, abs_address_locs);
  VARR_DESTROY (MIR_code_reloc_t, relocs);
  free (gen_ctx->target_ctx);
  gen_ctx->target_ctx = NULL;
}
